Message ID | 20190924124433.96810-5-slp@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Introduce the microvm machine type | expand |
On 9/24/19 2:44 PM, Sergio Lopez wrote: > Split up PCMachineState and PCMachineClass and derive X86MachineState > and X86MachineClass from them. This allows sharing code with non-PC > machine types. > > Also, move shared functions from pc.c to x86.c. > > Signed-off-by: Sergio Lopez <slp@redhat.com> > --- > hw/acpi/cpu_hotplug.c | 10 +- > hw/i386/Makefile.objs | 1 + > hw/i386/acpi-build.c | 31 +- > hw/i386/amd_iommu.c | 4 +- > hw/i386/intel_iommu.c | 4 +- > hw/i386/pc.c | 796 +++++------------------------------------- > hw/i386/pc_piix.c | 48 +-- > hw/i386/pc_q35.c | 38 +- > hw/i386/pc_sysfw.c | 60 +--- > hw/i386/x86.c | 788 +++++++++++++++++++++++++++++++++++++++++ > hw/intc/ioapic.c | 3 +- > include/hw/i386/pc.h | 29 +- > include/hw/i386/x86.h | 97 +++++ > 13 files changed, 1045 insertions(+), 864 deletions(-) > create mode 100644 hw/i386/x86.c > create mode 100644 include/hw/i386/x86.h > > diff --git a/hw/acpi/cpu_hotplug.c b/hw/acpi/cpu_hotplug.c > index 6e8293aac9..3ac2045a95 100644 > --- a/hw/acpi/cpu_hotplug.c > +++ b/hw/acpi/cpu_hotplug.c > @@ -128,7 +128,7 @@ void build_legacy_cpu_hotplug_aml(Aml *ctx, MachineState *machine, > Aml *one = aml_int(1); > MachineClass *mc = MACHINE_GET_CLASS(machine); > const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(machine); > - PCMachineState *pcms = PC_MACHINE(machine); > + X86MachineState *x86ms = X86_MACHINE(machine); > > /* > * _MAT method - creates an madt apic buffer > @@ -236,9 +236,9 @@ void build_legacy_cpu_hotplug_aml(Aml *ctx, MachineState *machine, > /* The current AML generator can cover the APIC ID range [0..255], > * inclusive, for VCPU hotplug. */ > QEMU_BUILD_BUG_ON(ACPI_CPU_HOTPLUG_ID_LIMIT > 256); > - if (pcms->apic_id_limit > ACPI_CPU_HOTPLUG_ID_LIMIT) { > + if (x86ms->apic_id_limit > ACPI_CPU_HOTPLUG_ID_LIMIT) { > error_report("max_cpus is too large. APIC ID of last CPU is %u", > - pcms->apic_id_limit - 1); > + x86ms->apic_id_limit - 1); > exit(1); > } > > @@ -315,8 +315,8 @@ void build_legacy_cpu_hotplug_aml(Aml *ctx, MachineState *machine, > * ith up to 255 elements. Windows guests up to win2k8 fail when > * VarPackageOp is used. > */ > - pkg = pcms->apic_id_limit <= 255 ? aml_package(pcms->apic_id_limit) : > - aml_varpackage(pcms->apic_id_limit); > + pkg = x86ms->apic_id_limit <= 255 ? aml_package(x86ms->apic_id_limit) : > + aml_varpackage(x86ms->apic_id_limit); > > for (i = 0, apic_idx = 0; i < apic_ids->len; i++) { > int apic_id = apic_ids->cpus[i].arch_id; > diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs > index 149712db07..5b4b3a672e 100644 > --- a/hw/i386/Makefile.objs > +++ b/hw/i386/Makefile.objs > @@ -1,6 +1,7 @@ > obj-$(CONFIG_KVM) += kvm/ > obj-y += multiboot.o > obj-y += pvh.o > +obj-y += x86.o > obj-y += pc.o > obj-y += e820.o > obj-$(CONFIG_I440FX) += pc_piix.o > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c > index e54e571a75..76e18d3285 100644 > --- a/hw/i386/acpi-build.c > +++ b/hw/i386/acpi-build.c > @@ -29,6 +29,7 @@ > #include "hw/pci/pci.h" > #include "hw/core/cpu.h" > #include "target/i386/cpu.h" > +#include "hw/i386/x86.h" > #include "hw/misc/pvpanic.h" > #include "hw/timer/hpet.h" > #include "hw/acpi/acpi-defs.h" > @@ -361,6 +362,7 @@ static void > build_madt(GArray *table_data, BIOSLinker *linker, PCMachineState *pcms) > { > MachineClass *mc = MACHINE_GET_CLASS(pcms); > + X86MachineState *x86ms = X86_MACHINE(pcms); > const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(MACHINE(pcms)); > int madt_start = table_data->len; > AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(pcms->acpi_dev); > @@ -390,7 +392,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, PCMachineState *pcms) > io_apic->address = cpu_to_le32(IO_APIC_DEFAULT_ADDRESS); > io_apic->interrupt = cpu_to_le32(0); > > - if (pcms->apic_xrupt_override) { > + if (x86ms->apic_xrupt_override) { > intsrcovr = acpi_data_push(table_data, sizeof *intsrcovr); > intsrcovr->type = ACPI_APIC_XRUPT_OVERRIDE; > intsrcovr->length = sizeof(*intsrcovr); > @@ -1817,8 +1819,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, > CrsRangeEntry *entry; > Aml *dsdt, *sb_scope, *scope, *dev, *method, *field, *pkg, *crs; > CrsRangeSet crs_range_set; > - PCMachineState *pcms = PC_MACHINE(machine); > PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine); > + X86MachineState *x86ms = X86_MACHINE(machine); > AcpiMcfgInfo mcfg; > uint32_t nr_mem = machine->ram_slots; > int root_bus_limit = 0xFF; > @@ -2083,7 +2085,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, > * with half of the 16-bit control register. Hence, the total size > * of the i/o region used is FW_CFG_CTL_SIZE; when using DMA, the > * DMA control register is located at FW_CFG_DMA_IO_BASE + 4 */ > - uint8_t io_size = object_property_get_bool(OBJECT(pcms->fw_cfg), > + uint8_t io_size = object_property_get_bool(OBJECT(x86ms->fw_cfg), > "dma_enabled", NULL) ? > ROUND_UP(FW_CFG_CTL_SIZE, 4) + sizeof(dma_addr_t) : > FW_CFG_CTL_SIZE; > @@ -2318,6 +2320,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) > MachineClass *mc = MACHINE_GET_CLASS(machine); > const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(machine); > PCMachineState *pcms = PC_MACHINE(machine); > + X86MachineState *x86ms = X86_MACHINE(machine); > ram_addr_t hotplugabble_address_space_size = > object_property_get_int(OBJECT(pcms), PC_MACHINE_DEVMEM_REGION_SIZE, > NULL); > @@ -2386,16 +2389,16 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) > } > > /* Cut out the ACPI_PCI hole */ > - if (mem_base <= pcms->below_4g_mem_size && > - next_base > pcms->below_4g_mem_size) { > - mem_len -= next_base - pcms->below_4g_mem_size; > + if (mem_base <= x86ms->below_4g_mem_size && > + next_base > x86ms->below_4g_mem_size) { > + mem_len -= next_base - x86ms->below_4g_mem_size; > if (mem_len > 0) { > numamem = acpi_data_push(table_data, sizeof *numamem); > build_srat_memory(numamem, mem_base, mem_len, i - 1, > MEM_AFFINITY_ENABLED); > } > mem_base = 1ULL << 32; > - mem_len = next_base - pcms->below_4g_mem_size; > + mem_len = next_base - x86ms->below_4g_mem_size; > next_base = mem_base + mem_len; > } > > @@ -2614,6 +2617,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) > { > PCMachineState *pcms = PC_MACHINE(machine); > PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); > + X86MachineState *x86ms = X86_MACHINE(machine); > GArray *table_offsets; > unsigned facs, dsdt, rsdt, fadt; > AcpiPmInfo pm; > @@ -2775,7 +2779,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) > */ > int legacy_aml_len = > pcmc->legacy_acpi_table_size + > - ACPI_BUILD_LEGACY_CPU_AML_SIZE * pcms->apic_id_limit; > + ACPI_BUILD_LEGACY_CPU_AML_SIZE * x86ms->apic_id_limit; > int legacy_table_size = > ROUND_UP(tables_blob->len - aml_len + legacy_aml_len, > ACPI_BUILD_ALIGN_SIZE); > @@ -2865,13 +2869,14 @@ void acpi_setup(void) > { > PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); > PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); > + X86MachineState *x86ms = X86_MACHINE(pcms); > AcpiBuildTables tables; > AcpiBuildState *build_state; > Object *vmgenid_dev; > TPMIf *tpm; > static FwCfgTPMConfig tpm_config; > > - if (!pcms->fw_cfg) { > + if (!x86ms->fw_cfg) { > ACPI_BUILD_DPRINTF("No fw cfg. Bailing out.\n"); > return; > } > @@ -2902,7 +2907,7 @@ void acpi_setup(void) > acpi_add_rom_blob(acpi_build_update, build_state, > tables.linker->cmd_blob, "etc/table-loader", 0); > > - fw_cfg_add_file(pcms->fw_cfg, ACPI_BUILD_TPMLOG_FILE, > + fw_cfg_add_file(x86ms->fw_cfg, ACPI_BUILD_TPMLOG_FILE, > tables.tcpalog->data, acpi_data_len(tables.tcpalog)); > > tpm = tpm_find(); > @@ -2912,13 +2917,13 @@ void acpi_setup(void) > .tpm_version = tpm_get_version(tpm), > .tpmppi_version = TPM_PPI_VERSION_1_30 > }; > - fw_cfg_add_file(pcms->fw_cfg, "etc/tpm/config", > + fw_cfg_add_file(x86ms->fw_cfg, "etc/tpm/config", > &tpm_config, sizeof tpm_config); > } > > vmgenid_dev = find_vmgenid_dev(); > if (vmgenid_dev) { > - vmgenid_add_fw_cfg(VMGENID(vmgenid_dev), pcms->fw_cfg, > + vmgenid_add_fw_cfg(VMGENID(vmgenid_dev), x86ms->fw_cfg, > tables.vmgenid); > } > > @@ -2931,7 +2936,7 @@ void acpi_setup(void) > uint32_t rsdp_size = acpi_data_len(tables.rsdp); > > build_state->rsdp = g_memdup(tables.rsdp->data, rsdp_size); > - fw_cfg_add_file_callback(pcms->fw_cfg, ACPI_BUILD_RSDP_FILE, > + fw_cfg_add_file_callback(x86ms->fw_cfg, ACPI_BUILD_RSDP_FILE, > acpi_build_update, NULL, build_state, > build_state->rsdp, rsdp_size, true); > build_state->rsdp_mr = NULL; > diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c > index 08884523e2..bb3b5b4563 100644 > --- a/hw/i386/amd_iommu.c > +++ b/hw/i386/amd_iommu.c > @@ -21,6 +21,7 @@ > */ > > #include "qemu/osdep.h" > +#include "hw/i386/x86.h" > #include "hw/i386/pc.h" > #include "hw/pci/msi.h" > #include "hw/pci/pci_bus.h" > @@ -1537,6 +1538,7 @@ static void amdvi_realize(DeviceState *dev, Error **err) > X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); > MachineState *ms = MACHINE(qdev_get_machine()); > PCMachineState *pcms = PC_MACHINE(ms); > + X86MachineState *x86ms = X86_MACHINE(ms); > PCIBus *bus = pcms->bus; > > s->iotlb = g_hash_table_new_full(amdvi_uint64_hash, > @@ -1565,7 +1567,7 @@ static void amdvi_realize(DeviceState *dev, Error **err) > } > > /* Pseudo address space under root PCI bus. */ > - pcms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID); > + x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID); > > /* set up MMIO */ > memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, "amdvi-mmio", > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c > index 75ca6f9c70..21f091c654 100644 > --- a/hw/i386/intel_iommu.c > +++ b/hw/i386/intel_iommu.c > @@ -29,6 +29,7 @@ > #include "hw/pci/pci.h" > #include "hw/pci/pci_bus.h" > #include "hw/qdev-properties.h" > +#include "hw/i386/x86.h" > #include "hw/i386/pc.h" > #include "hw/i386/apic-msidef.h" > #include "hw/boards.h" > @@ -3703,6 +3704,7 @@ static void vtd_realize(DeviceState *dev, Error **errp) > { > MachineState *ms = MACHINE(qdev_get_machine()); > PCMachineState *pcms = PC_MACHINE(ms); > + X86MachineState *x86ms = X86_MACHINE(ms); > PCIBus *bus = pcms->bus; > IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev); > X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); > @@ -3743,7 +3745,7 @@ static void vtd_realize(DeviceState *dev, Error **errp) > sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR); > pci_setup_iommu(bus, vtd_host_dma_iommu, dev); > /* Pseudo address space under root PCI bus. */ > - pcms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC); > + x86ms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC); > } > > static void vtd_class_init(ObjectClass *klass, void *data) > diff --git a/hw/i386/pc.c b/hw/i386/pc.c > index 3920aa7e85..d18b461f01 100644 > --- a/hw/i386/pc.c > +++ b/hw/i386/pc.c > @@ -24,6 +24,7 @@ > > #include "qemu/osdep.h" > #include "qemu/units.h" > +#include "hw/i386/x86.h" > #include "hw/i386/pc.h" > #include "hw/char/serial.h" > #include "hw/char/parallel.h" > @@ -676,6 +677,7 @@ void pc_cmos_init(PCMachineState *pcms, > BusState *idebus0, BusState *idebus1, > ISADevice *s) > { > + X86MachineState *x86ms = X86_MACHINE(pcms); > int val; > static pc_cmos_init_late_arg arg; > > @@ -683,12 +685,12 @@ void pc_cmos_init(PCMachineState *pcms, > > /* memory size */ > /* base memory (first MiB) */ > - val = MIN(pcms->below_4g_mem_size / KiB, 640); > + val = MIN(x86ms->below_4g_mem_size / KiB, 640); > rtc_set_memory(s, 0x15, val); > rtc_set_memory(s, 0x16, val >> 8); > /* extended memory (next 64MiB) */ > - if (pcms->below_4g_mem_size > 1 * MiB) { > - val = (pcms->below_4g_mem_size - 1 * MiB) / KiB; > + if (x86ms->below_4g_mem_size > 1 * MiB) { > + val = (x86ms->below_4g_mem_size - 1 * MiB) / KiB; > } else { > val = 0; > } > @@ -699,8 +701,8 @@ void pc_cmos_init(PCMachineState *pcms, > rtc_set_memory(s, 0x30, val); > rtc_set_memory(s, 0x31, val >> 8); > /* memory between 16MiB and 4GiB */ > - if (pcms->below_4g_mem_size > 16 * MiB) { > - val = (pcms->below_4g_mem_size - 16 * MiB) / (64 * KiB); > + if (x86ms->below_4g_mem_size > 16 * MiB) { > + val = (x86ms->below_4g_mem_size - 16 * MiB) / (64 * KiB); > } else { > val = 0; > } > @@ -709,20 +711,20 @@ void pc_cmos_init(PCMachineState *pcms, > rtc_set_memory(s, 0x34, val); > rtc_set_memory(s, 0x35, val >> 8); > /* memory above 4GiB */ > - val = pcms->above_4g_mem_size / 65536; > + val = x86ms->above_4g_mem_size / 65536; > rtc_set_memory(s, 0x5b, val); > rtc_set_memory(s, 0x5c, val >> 8); > rtc_set_memory(s, 0x5d, val >> 16); > > - object_property_add_link(OBJECT(pcms), "rtc_state", > + object_property_add_link(OBJECT(x86ms), "rtc_state", > TYPE_ISA_DEVICE, > - (Object **)&pcms->rtc, > + (Object **)&x86ms->rtc, > object_property_allow_set_link, > OBJ_PROP_LINK_STRONG, &error_abort); > - object_property_set_link(OBJECT(pcms), OBJECT(s), > + object_property_set_link(OBJECT(x86ms), OBJECT(s), > "rtc_state", &error_abort); > > - set_boot_dev(s, MACHINE(pcms)->boot_order, &error_fatal); > + set_boot_dev(s, MACHINE(x86ms)->boot_order, &error_fatal); > > val = 0; > val |= 0x02; /* FPU is there */ > @@ -863,35 +865,6 @@ static void handle_a20_line_change(void *opaque, int irq, int level) > x86_cpu_set_a20(cpu, level); > } > > -/* Calculates initial APIC ID for a specific CPU index > - * > - * Currently we need to be able to calculate the APIC ID from the CPU index > - * alone (without requiring a CPU object), as the QEMU<->Seabios interfaces have > - * no concept of "CPU index", and the NUMA tables on fw_cfg need the APIC ID of > - * all CPUs up to max_cpus. > - */ > -static uint32_t x86_cpu_apic_id_from_index(PCMachineState *pcms, > - unsigned int cpu_index) > -{ > - MachineState *ms = MACHINE(pcms); > - PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); > - uint32_t correct_id; > - static bool warned; > - > - correct_id = x86_apicid_from_cpu_idx(pcms->smp_dies, ms->smp.cores, > - ms->smp.threads, cpu_index); > - if (pcmc->compat_apic_id_mode) { > - if (cpu_index != correct_id && !warned && !qtest_enabled()) { > - error_report("APIC IDs set in compatibility mode, " > - "CPU topology won't match the configuration"); > - warned = true; > - } > - return cpu_index; > - } else { > - return correct_id; > - } > -} > - > static void pc_build_smbios(PCMachineState *pcms) > { > uint8_t *smbios_tables, *smbios_anchor; > @@ -899,6 +872,7 @@ static void pc_build_smbios(PCMachineState *pcms) > struct smbios_phys_mem_area *mem_array; > unsigned i, array_count; > MachineState *ms = MACHINE(pcms); > + X86MachineState *x86ms = X86_MACHINE(pcms); > X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu); > > /* tell smbios about cpuid version and features */ > @@ -906,7 +880,7 @@ static void pc_build_smbios(PCMachineState *pcms) > > smbios_tables = smbios_get_table_legacy(ms, &smbios_tables_len); > if (smbios_tables) { > - fw_cfg_add_bytes(pcms->fw_cfg, FW_CFG_SMBIOS_ENTRIES, > + fw_cfg_add_bytes(x86ms->fw_cfg, FW_CFG_SMBIOS_ENTRIES, > smbios_tables, smbios_tables_len); > } > > @@ -927,9 +901,9 @@ static void pc_build_smbios(PCMachineState *pcms) > g_free(mem_array); > > if (smbios_anchor) { > - fw_cfg_add_file(pcms->fw_cfg, "etc/smbios/smbios-tables", > + fw_cfg_add_file(x86ms->fw_cfg, "etc/smbios/smbios-tables", > smbios_tables, smbios_tables_len); > - fw_cfg_add_file(pcms->fw_cfg, "etc/smbios/smbios-anchor", > + fw_cfg_add_file(x86ms->fw_cfg, "etc/smbios/smbios-anchor", > smbios_anchor, smbios_anchor_len); > } > } > @@ -942,10 +916,11 @@ static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms) > const CPUArchIdList *cpus; > MachineClass *mc = MACHINE_GET_CLASS(pcms); > MachineState *ms = MACHINE(pcms); > + X86MachineState *x86ms = X86_MACHINE(pcms); > int nb_numa_nodes = ms->numa_state->num_nodes; > > fw_cfg = fw_cfg_init_io_dma(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4, as); > - fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus); > + fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); > > /* FW_CFG_MAX_CPUS is a bit confusing/problematic on x86: > * > @@ -959,7 +934,7 @@ static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms) > * So for compatibility reasons with old BIOSes we are stuck with > * "etc/max-cpus" actually being apic_id_limit > */ > - fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)pcms->apic_id_limit); > + fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)x86ms->apic_id_limit); > fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size); > fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES, > acpi_tables, acpi_tables_len); > @@ -972,374 +947,25 @@ static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms) > * of nodes, one word for each VCPU->node and one word for each node to > * hold the amount of memory. > */ > - numa_fw_cfg = g_new0(uint64_t, 1 + pcms->apic_id_limit + nb_numa_nodes); > + numa_fw_cfg = g_new0(uint64_t, 1 + x86ms->apic_id_limit + nb_numa_nodes); > numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes); > cpus = mc->possible_cpu_arch_ids(MACHINE(pcms)); > for (i = 0; i < cpus->len; i++) { > unsigned int apic_id = cpus->cpus[i].arch_id; > - assert(apic_id < pcms->apic_id_limit); > + assert(apic_id < x86ms->apic_id_limit); > numa_fw_cfg[apic_id + 1] = cpu_to_le64(cpus->cpus[i].props.node_id); > } > for (i = 0; i < nb_numa_nodes; i++) { > - numa_fw_cfg[pcms->apic_id_limit + 1 + i] = > + numa_fw_cfg[x86ms->apic_id_limit + 1 + i] = > cpu_to_le64(ms->numa_state->nodes[i].node_mem); > } > fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg, > - (1 + pcms->apic_id_limit + nb_numa_nodes) * > + (1 + x86ms->apic_id_limit + nb_numa_nodes) * > sizeof(*numa_fw_cfg)); > > return fw_cfg; > } > > -static long get_file_size(FILE *f) > -{ > - long where, size; > - > - /* XXX: on Unix systems, using fstat() probably makes more sense */ > - > - where = ftell(f); > - fseek(f, 0, SEEK_END); > - size = ftell(f); > - fseek(f, where, SEEK_SET); > - > - return size; > -} > - > -struct setup_data { > - uint64_t next; > - uint32_t type; > - uint32_t len; > - uint8_t data[0]; > -} __attribute__((packed)); > - > -static void load_linux(PCMachineState *pcms, > - FWCfgState *fw_cfg) > -{ > - uint16_t protocol; > - int setup_size, kernel_size, cmdline_size; > - int dtb_size, setup_data_offset; > - uint32_t initrd_max; > - uint8_t header[8192], *setup, *kernel; > - hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; > - FILE *f; > - char *vmode; > - MachineState *machine = MACHINE(pcms); > - PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); > - struct setup_data *setup_data; > - const char *kernel_filename = machine->kernel_filename; > - const char *initrd_filename = machine->initrd_filename; > - const char *dtb_filename = machine->dtb; > - const char *kernel_cmdline = machine->kernel_cmdline; > - > - /* Align to 16 bytes as a paranoia measure */ > - cmdline_size = (strlen(kernel_cmdline)+16) & ~15; > - > - /* load the kernel header */ > - f = fopen(kernel_filename, "rb"); > - if (!f || !(kernel_size = get_file_size(f)) || > - fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != > - MIN(ARRAY_SIZE(header), kernel_size)) { > - fprintf(stderr, "qemu: could not load kernel '%s': %s\n", > - kernel_filename, strerror(errno)); > - exit(1); > - } > - > - /* kernel protocol version */ > -#if 0 > - fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202)); > -#endif > - if (ldl_p(header+0x202) == 0x53726448) { > - protocol = lduw_p(header+0x206); > - } else { > - size_t pvh_start_addr; > - uint32_t mh_load_addr = 0; > - uint32_t elf_kernel_size = 0; > - /* > - * This could be a multiboot kernel. If it is, let's stop treating it > - * like a Linux kernel. > - * Note: some multiboot images could be in the ELF format (the same of > - * PVH), so we try multiboot first since we check the multiboot magic > - * header before to load it. > - */ > - if (load_multiboot(fw_cfg, f, kernel_filename, initrd_filename, > - kernel_cmdline, kernel_size, header)) { > - return; > - } > - /* > - * Check if the file is an uncompressed kernel file (ELF) and load it, > - * saving the PVH entry point used by the x86/HVM direct boot ABI. > - * If load_elfboot() is successful, populate the fw_cfg info. > - */ > - if (pcmc->pvh_enabled && > - pvh_load_elfboot(kernel_filename, > - &mh_load_addr, &elf_kernel_size)) { > - fclose(f); > - > - pvh_start_addr = pvh_get_start_addr(); > - > - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr); > - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); > - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size); > - > - fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, > - strlen(kernel_cmdline) + 1); > - fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); > - > - fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header)); > - fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, > - header, sizeof(header)); > - > - /* load initrd */ > - if (initrd_filename) { > - GMappedFile *mapped_file; > - gsize initrd_size; > - gchar *initrd_data; > - GError *gerr = NULL; > - > - mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); > - if (!mapped_file) { > - fprintf(stderr, "qemu: error reading initrd %s: %s\n", > - initrd_filename, gerr->message); > - exit(1); > - } > - pcms->initrd_mapped_file = mapped_file; > - > - initrd_data = g_mapped_file_get_contents(mapped_file); > - initrd_size = g_mapped_file_get_length(mapped_file); > - initrd_max = pcms->below_4g_mem_size - pcmc->acpi_data_size - 1; > - if (initrd_size >= initrd_max) { > - fprintf(stderr, "qemu: initrd is too large, cannot support." > - "(max: %"PRIu32", need %"PRId64")\n", > - initrd_max, (uint64_t)initrd_size); > - exit(1); > - } > - > - initrd_addr = (initrd_max - initrd_size) & ~4095; > - > - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); > - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); > - fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, > - initrd_size); > - } > - > - option_rom[nb_option_roms].bootindex = 0; > - option_rom[nb_option_roms].name = "pvh.bin"; > - nb_option_roms++; > - > - return; > - } > - protocol = 0; > - } > - > - if (protocol < 0x200 || !(header[0x211] & 0x01)) { > - /* Low kernel */ > - real_addr = 0x90000; > - cmdline_addr = 0x9a000 - cmdline_size; > - prot_addr = 0x10000; > - } else if (protocol < 0x202) { > - /* High but ancient kernel */ > - real_addr = 0x90000; > - cmdline_addr = 0x9a000 - cmdline_size; > - prot_addr = 0x100000; > - } else { > - /* High and recent kernel */ > - real_addr = 0x10000; > - cmdline_addr = 0x20000; > - prot_addr = 0x100000; > - } > - > -#if 0 > - fprintf(stderr, > - "qemu: real_addr = 0x" TARGET_FMT_plx "\n" > - "qemu: cmdline_addr = 0x" TARGET_FMT_plx "\n" > - "qemu: prot_addr = 0x" TARGET_FMT_plx "\n", > - real_addr, > - cmdline_addr, > - prot_addr); > -#endif > - > - /* highest address for loading the initrd */ > - if (protocol >= 0x20c && > - lduw_p(header+0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) { > - /* > - * Linux has supported initrd up to 4 GB for a very long time (2007, > - * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013), > - * though it only sets initrd_max to 2 GB to "work around bootloader > - * bugs". Luckily, QEMU firmware(which does something like bootloader) > - * has supported this. > - * > - * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can > - * be loaded into any address. > - * > - * In addition, initrd_max is uint32_t simply because QEMU doesn't > - * support the 64-bit boot protocol (specifically the ext_ramdisk_image > - * field). > - * > - * Therefore here just limit initrd_max to UINT32_MAX simply as well. > - */ > - initrd_max = UINT32_MAX; > - } else if (protocol >= 0x203) { > - initrd_max = ldl_p(header+0x22c); > - } else { > - initrd_max = 0x37ffffff; > - } > - > - if (initrd_max >= pcms->below_4g_mem_size - pcmc->acpi_data_size) { > - initrd_max = pcms->below_4g_mem_size - pcmc->acpi_data_size - 1; > - } > - > - fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); > - fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline)+1); > - fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); > - > - if (protocol >= 0x202) { > - stl_p(header+0x228, cmdline_addr); > - } else { > - stw_p(header+0x20, 0xA33F); > - stw_p(header+0x22, cmdline_addr-real_addr); > - } > - > - /* handle vga= parameter */ > - vmode = strstr(kernel_cmdline, "vga="); > - if (vmode) { > - unsigned int video_mode; > - /* skip "vga=" */ > - vmode += 4; > - if (!strncmp(vmode, "normal", 6)) { > - video_mode = 0xffff; > - } else if (!strncmp(vmode, "ext", 3)) { > - video_mode = 0xfffe; > - } else if (!strncmp(vmode, "ask", 3)) { > - video_mode = 0xfffd; > - } else { > - video_mode = strtol(vmode, NULL, 0); > - } > - stw_p(header+0x1fa, video_mode); > - } > - > - /* loader type */ > - /* High nybble = B reserved for QEMU; low nybble is revision number. > - If this code is substantially changed, you may want to consider > - incrementing the revision. */ > - if (protocol >= 0x200) { > - header[0x210] = 0xB0; > - } > - /* heap */ > - if (protocol >= 0x201) { > - header[0x211] |= 0x80; /* CAN_USE_HEAP */ > - stw_p(header+0x224, cmdline_addr-real_addr-0x200); > - } > - > - /* load initrd */ > - if (initrd_filename) { > - GMappedFile *mapped_file; > - gsize initrd_size; > - gchar *initrd_data; > - GError *gerr = NULL; > - > - if (protocol < 0x200) { > - fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n"); > - exit(1); > - } > - > - mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); > - if (!mapped_file) { > - fprintf(stderr, "qemu: error reading initrd %s: %s\n", > - initrd_filename, gerr->message); > - exit(1); > - } > - pcms->initrd_mapped_file = mapped_file; > - > - initrd_data = g_mapped_file_get_contents(mapped_file); > - initrd_size = g_mapped_file_get_length(mapped_file); > - if (initrd_size >= initrd_max) { > - fprintf(stderr, "qemu: initrd is too large, cannot support." > - "(max: %"PRIu32", need %"PRId64")\n", > - initrd_max, (uint64_t)initrd_size); > - exit(1); > - } > - > - initrd_addr = (initrd_max-initrd_size) & ~4095; > - > - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); > - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); > - fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size); > - > - stl_p(header+0x218, initrd_addr); > - stl_p(header+0x21c, initrd_size); > - } > - > - /* load kernel and setup */ > - setup_size = header[0x1f1]; > - if (setup_size == 0) { > - setup_size = 4; > - } > - setup_size = (setup_size+1)*512; > - if (setup_size > kernel_size) { > - fprintf(stderr, "qemu: invalid kernel header\n"); > - exit(1); > - } > - kernel_size -= setup_size; > - > - setup = g_malloc(setup_size); > - kernel = g_malloc(kernel_size); > - fseek(f, 0, SEEK_SET); > - if (fread(setup, 1, setup_size, f) != setup_size) { > - fprintf(stderr, "fread() failed\n"); > - exit(1); > - } > - if (fread(kernel, 1, kernel_size, f) != kernel_size) { > - fprintf(stderr, "fread() failed\n"); > - exit(1); > - } > - fclose(f); > - > - /* append dtb to kernel */ > - if (dtb_filename) { > - if (protocol < 0x209) { > - fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n"); > - exit(1); > - } > - > - dtb_size = get_image_size(dtb_filename); > - if (dtb_size <= 0) { > - fprintf(stderr, "qemu: error reading dtb %s: %s\n", > - dtb_filename, strerror(errno)); > - exit(1); > - } > - > - setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); > - kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; > - kernel = g_realloc(kernel, kernel_size); > - > - stq_p(header+0x250, prot_addr + setup_data_offset); > - > - setup_data = (struct setup_data *)(kernel + setup_data_offset); > - setup_data->next = 0; > - setup_data->type = cpu_to_le32(SETUP_DTB); > - setup_data->len = cpu_to_le32(dtb_size); > - > - load_image_size(dtb_filename, setup_data->data, dtb_size); > - } > - > - memcpy(setup, header, MIN(sizeof(header), setup_size)); > - > - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); > - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); > - fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); > - > - fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); > - fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); > - fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); > - > - option_rom[nb_option_roms].bootindex = 0; > - option_rom[nb_option_roms].name = "linuxboot.bin"; > - if (pcmc->linuxboot_dma_enabled && fw_cfg_dma_enabled(fw_cfg)) { > - option_rom[nb_option_roms].name = "linuxboot_dma.bin"; > - } > - nb_option_roms++; > -} > - > #define NE2000_NB_MAX 6 > > static const int ne2000_io[NE2000_NB_MAX] = { 0x300, 0x320, 0x340, 0x360, > @@ -1376,157 +1002,10 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level) > } > } > > -static void pc_new_cpu(PCMachineState *pcms, int64_t apic_id, Error **errp) > -{ > - Object *cpu = NULL; > - Error *local_err = NULL; > - CPUX86State *env = NULL; > - > - cpu = object_new(MACHINE(pcms)->cpu_type); > - > - env = &X86_CPU(cpu)->env; > - env->nr_dies = pcms->smp_dies; > - > - object_property_set_uint(cpu, apic_id, "apic-id", &local_err); > - object_property_set_bool(cpu, true, "realized", &local_err); > - > - object_unref(cpu); > - error_propagate(errp, local_err); > -} > - > -/* > - * This function is very similar to smp_parse() > - * in hw/core/machine.c but includes CPU die support. > - */ > -void pc_smp_parse(MachineState *ms, QemuOpts *opts) > -{ > - PCMachineState *pcms = PC_MACHINE(ms); > - > - if (opts) { > - unsigned cpus = qemu_opt_get_number(opts, "cpus", 0); > - unsigned sockets = qemu_opt_get_number(opts, "sockets", 0); > - unsigned dies = qemu_opt_get_number(opts, "dies", 1); > - unsigned cores = qemu_opt_get_number(opts, "cores", 0); > - unsigned threads = qemu_opt_get_number(opts, "threads", 0); > - > - /* compute missing values, prefer sockets over cores over threads */ > - if (cpus == 0 || sockets == 0) { > - cores = cores > 0 ? cores : 1; > - threads = threads > 0 ? threads : 1; > - if (cpus == 0) { > - sockets = sockets > 0 ? sockets : 1; > - cpus = cores * threads * dies * sockets; > - } else { > - ms->smp.max_cpus = > - qemu_opt_get_number(opts, "maxcpus", cpus); > - sockets = ms->smp.max_cpus / (cores * threads * dies); > - } > - } else if (cores == 0) { > - threads = threads > 0 ? threads : 1; > - cores = cpus / (sockets * dies * threads); > - cores = cores > 0 ? cores : 1; > - } else if (threads == 0) { > - threads = cpus / (cores * dies * sockets); > - threads = threads > 0 ? threads : 1; > - } else if (sockets * dies * cores * threads < cpus) { > - error_report("cpu topology: " > - "sockets (%u) * dies (%u) * cores (%u) * threads (%u) < " > - "smp_cpus (%u)", > - sockets, dies, cores, threads, cpus); > - exit(1); > - } > - > - ms->smp.max_cpus = > - qemu_opt_get_number(opts, "maxcpus", cpus); > - > - if (ms->smp.max_cpus < cpus) { > - error_report("maxcpus must be equal to or greater than smp"); > - exit(1); > - } > - > - if (sockets * dies * cores * threads > ms->smp.max_cpus) { > - error_report("cpu topology: " > - "sockets (%u) * dies (%u) * cores (%u) * threads (%u) > " > - "maxcpus (%u)", > - sockets, dies, cores, threads, > - ms->smp.max_cpus); > - exit(1); > - } > - > - if (sockets * dies * cores * threads != ms->smp.max_cpus) { > - warn_report("Invalid CPU topology deprecated: " > - "sockets (%u) * dies (%u) * cores (%u) * threads (%u) " > - "!= maxcpus (%u)", > - sockets, dies, cores, threads, > - ms->smp.max_cpus); > - } > - > - ms->smp.cpus = cpus; > - ms->smp.cores = cores; > - ms->smp.threads = threads; > - pcms->smp_dies = dies; > - } > - > - if (ms->smp.cpus > 1) { > - Error *blocker = NULL; > - error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED, "smp"); > - replay_add_blocker(blocker); > - } > -} > - > -void pc_hot_add_cpu(MachineState *ms, const int64_t id, Error **errp) > -{ > - PCMachineState *pcms = PC_MACHINE(ms); > - int64_t apic_id = x86_cpu_apic_id_from_index(pcms, id); > - Error *local_err = NULL; > - > - if (id < 0) { > - error_setg(errp, "Invalid CPU id: %" PRIi64, id); > - return; > - } > - > - if (apic_id >= ACPI_CPU_HOTPLUG_ID_LIMIT) { > - error_setg(errp, "Unable to add CPU: %" PRIi64 > - ", resulting APIC ID (%" PRIi64 ") is too large", > - id, apic_id); > - return; > - } > - > - pc_new_cpu(PC_MACHINE(ms), apic_id, &local_err); > - if (local_err) { > - error_propagate(errp, local_err); > - return; > - } > -} > - > -void pc_cpus_init(PCMachineState *pcms) > -{ > - int i; > - const CPUArchIdList *possible_cpus; > - MachineState *ms = MACHINE(pcms); > - MachineClass *mc = MACHINE_GET_CLASS(pcms); > - PCMachineClass *pcmc = PC_MACHINE_CLASS(mc); > - > - x86_cpu_set_default_version(pcmc->default_cpu_version); > - > - /* Calculates the limit to CPU APIC ID values > - * > - * Limit for the APIC ID value, so that all > - * CPU APIC IDs are < pcms->apic_id_limit. > - * > - * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init(). > - */ > - pcms->apic_id_limit = x86_cpu_apic_id_from_index(pcms, > - ms->smp.max_cpus - 1) + 1; > - possible_cpus = mc->possible_cpu_arch_ids(ms); > - for (i = 0; i < ms->smp.cpus; i++) { > - pc_new_cpu(pcms, possible_cpus->cpus[i].arch_id, &error_fatal); > - } > -} > - > static void pc_build_feature_control_file(PCMachineState *pcms) > { > MachineState *ms = MACHINE(pcms); > + X86MachineState *x86ms = X86_MACHINE(pcms); > X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu); > CPUX86State *env = &cpu->env; > uint32_t unused, ecx, edx; > @@ -1550,7 +1029,7 @@ static void pc_build_feature_control_file(PCMachineState *pcms) > > val = g_malloc(sizeof(*val)); > *val = cpu_to_le64(feature_control_bits | FEATURE_CONTROL_LOCKED); > - fw_cfg_add_file(pcms->fw_cfg, "etc/msr_feature_control", val, sizeof(*val)); > + fw_cfg_add_file(x86ms->fw_cfg, "etc/msr_feature_control", val, sizeof(*val)); > } > > static void rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count) > @@ -1571,10 +1050,11 @@ void pc_machine_done(Notifier *notifier, void *data) > { > PCMachineState *pcms = container_of(notifier, > PCMachineState, machine_done); > + X86MachineState *x86ms = X86_MACHINE(pcms); > PCIBus *bus = pcms->bus; > > /* set the number of CPUs */ > - rtc_set_cpus_count(pcms->rtc, pcms->boot_cpus); > + rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); > > if (bus) { > int extra_hosts = 0; > @@ -1585,23 +1065,23 @@ void pc_machine_done(Notifier *notifier, void *data) > extra_hosts++; > } > } > - if (extra_hosts && pcms->fw_cfg) { > + if (extra_hosts && x86ms->fw_cfg) { > uint64_t *val = g_malloc(sizeof(*val)); > *val = cpu_to_le64(extra_hosts); > - fw_cfg_add_file(pcms->fw_cfg, > + fw_cfg_add_file(x86ms->fw_cfg, > "etc/extra-pci-roots", val, sizeof(*val)); > } > } > > acpi_setup(); > - if (pcms->fw_cfg) { > + if (x86ms->fw_cfg) { > pc_build_smbios(pcms); > pc_build_feature_control_file(pcms); > /* update FW_CFG_NB_CPUS to account for -device added CPUs */ > - fw_cfg_modify_i16(pcms->fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus); > + fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); > } > > - if (pcms->apic_id_limit > 255 && !xen_enabled()) { > + if (x86ms->apic_id_limit > 255 && !xen_enabled()) { > IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default()); > > if (!iommu || !x86_iommu_ir_supported(X86_IOMMU_DEVICE(iommu)) || > @@ -1619,8 +1099,9 @@ void pc_guest_info_init(PCMachineState *pcms) > { > int i; > MachineState *ms = MACHINE(pcms); > + X86MachineState *x86ms = X86_MACHINE(pcms); > > - pcms->apic_xrupt_override = kvm_allows_irq0_override(); > + x86ms->apic_xrupt_override = kvm_allows_irq0_override(); > pcms->numa_nodes = ms->numa_state->num_nodes; > pcms->node_mem = g_malloc0(pcms->numa_nodes * > sizeof *pcms->node_mem); > @@ -1645,14 +1126,17 @@ void xen_load_linux(PCMachineState *pcms) > { > int i; > FWCfgState *fw_cfg; > + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); > + X86MachineState *x86ms = X86_MACHINE(pcms); > > assert(MACHINE(pcms)->kernel_filename != NULL); > > fw_cfg = fw_cfg_init_io(FW_CFG_IO_BASE); > - fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus); > + fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); > rom_set_fw(fw_cfg); > > - load_linux(pcms, fw_cfg); > + load_linux(x86ms, fw_cfg, pcmc->acpi_data_size, > + pcmc->linuxboot_dma_enabled, pcmc->pvh_enabled); > for (i = 0; i < nb_option_roms; i++) { > assert(!strcmp(option_rom[i].name, "linuxboot.bin") || > !strcmp(option_rom[i].name, "linuxboot_dma.bin") || > @@ -1660,7 +1144,7 @@ void xen_load_linux(PCMachineState *pcms) > !strcmp(option_rom[i].name, "multiboot.bin")); > rom_add_option(option_rom[i].name, option_rom[i].bootindex); > } > - pcms->fw_cfg = fw_cfg; > + x86ms->fw_cfg = fw_cfg; > } > > void pc_memory_init(PCMachineState *pcms, > @@ -1673,10 +1157,11 @@ void pc_memory_init(PCMachineState *pcms, > MemoryRegion *ram_below_4g, *ram_above_4g; > FWCfgState *fw_cfg; > MachineState *machine = MACHINE(pcms); > + X86MachineState *x86ms = X86_MACHINE(pcms); > PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); > > - assert(machine->ram_size == pcms->below_4g_mem_size + > - pcms->above_4g_mem_size); > + assert(machine->ram_size == x86ms->below_4g_mem_size + > + x86ms->above_4g_mem_size); > > linux_boot = (machine->kernel_filename != NULL); > > @@ -1690,17 +1175,17 @@ void pc_memory_init(PCMachineState *pcms, > *ram_memory = ram; > ram_below_4g = g_malloc(sizeof(*ram_below_4g)); > memory_region_init_alias(ram_below_4g, NULL, "ram-below-4g", ram, > - 0, pcms->below_4g_mem_size); > + 0, x86ms->below_4g_mem_size); > memory_region_add_subregion(system_memory, 0, ram_below_4g); > - e820_add_entry(0, pcms->below_4g_mem_size, E820_RAM); > - if (pcms->above_4g_mem_size > 0) { > + e820_add_entry(0, x86ms->below_4g_mem_size, E820_RAM); > + if (x86ms->above_4g_mem_size > 0) { > ram_above_4g = g_malloc(sizeof(*ram_above_4g)); > memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g", ram, > - pcms->below_4g_mem_size, > - pcms->above_4g_mem_size); > + x86ms->below_4g_mem_size, > + x86ms->above_4g_mem_size); > memory_region_add_subregion(system_memory, 0x100000000ULL, > ram_above_4g); > - e820_add_entry(0x100000000ULL, pcms->above_4g_mem_size, E820_RAM); > + e820_add_entry(0x100000000ULL, x86ms->above_4g_mem_size, E820_RAM); > } > > if (!pcmc->has_reserved_memory && > @@ -1735,7 +1220,7 @@ void pc_memory_init(PCMachineState *pcms, > } > > machine->device_memory->base = > - ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size, 1 * GiB); > + ROUND_UP(0x100000000ULL + x86ms->above_4g_mem_size, 1 * GiB); > > if (pcmc->enforce_aligned_dimm) { > /* size device region assuming 1G page max alignment per slot */ > @@ -1786,16 +1271,17 @@ void pc_memory_init(PCMachineState *pcms, > } > > if (linux_boot) { > - load_linux(pcms, fw_cfg); > + load_linux(x86ms, fw_cfg, pcmc->acpi_data_size, > + pcmc->linuxboot_dma_enabled, pcmc->pvh_enabled); > } > > for (i = 0; i < nb_option_roms; i++) { > rom_add_option(option_rom[i].name, option_rom[i].bootindex); > } > - pcms->fw_cfg = fw_cfg; > + x86ms->fw_cfg = fw_cfg; > > /* Init default IOAPIC address space */ > - pcms->ioapic_as = &address_space_memory; > + x86ms->ioapic_as = &address_space_memory; > } > > /* > @@ -1807,6 +1293,7 @@ uint64_t pc_pci_hole64_start(void) > PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); > PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); > MachineState *ms = MACHINE(pcms); > + X86MachineState *x86ms = X86_MACHINE(pcms); > uint64_t hole64_start = 0; > > if (pcmc->has_reserved_memory && ms->device_memory->base) { > @@ -1815,7 +1302,7 @@ uint64_t pc_pci_hole64_start(void) > hole64_start += memory_region_size(&ms->device_memory->mr); > } > } else { > - hole64_start = 0x100000000ULL + pcms->above_4g_mem_size; > + hole64_start = 0x100000000ULL + x86ms->above_4g_mem_size; > } > > return ROUND_UP(hole64_start, 1 * GiB); > @@ -2154,6 +1641,7 @@ static void pc_cpu_plug(HotplugHandler *hotplug_dev, > Error *local_err = NULL; > X86CPU *cpu = X86_CPU(dev); > PCMachineState *pcms = PC_MACHINE(hotplug_dev); > + X86MachineState *x86ms = X86_MACHINE(pcms); > > if (pcms->acpi_dev) { > hotplug_handler_plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); > @@ -2163,12 +1651,12 @@ static void pc_cpu_plug(HotplugHandler *hotplug_dev, > } > > /* increment the number of CPUs */ > - pcms->boot_cpus++; > - if (pcms->rtc) { > - rtc_set_cpus_count(pcms->rtc, pcms->boot_cpus); > + x86ms->boot_cpus++; > + if (x86ms->rtc) { > + rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); > } > - if (pcms->fw_cfg) { > - fw_cfg_modify_i16(pcms->fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus); > + if (x86ms->fw_cfg) { > + fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); > } > > found_cpu = pc_find_cpu_slot(MACHINE(pcms), cpu->apic_id, NULL); > @@ -2214,6 +1702,7 @@ static void pc_cpu_unplug_cb(HotplugHandler *hotplug_dev, > Error *local_err = NULL; > X86CPU *cpu = X86_CPU(dev); > PCMachineState *pcms = PC_MACHINE(hotplug_dev); > + X86MachineState *x86ms = X86_MACHINE(pcms); > > hotplug_handler_unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); > if (local_err) { > @@ -2225,10 +1714,10 @@ static void pc_cpu_unplug_cb(HotplugHandler *hotplug_dev, > object_property_set_bool(OBJECT(dev), false, "realized", NULL); > > /* decrement the number of CPUs */ > - pcms->boot_cpus--; > + x86ms->boot_cpus--; > /* Update the number of CPUs in CMOS */ > - rtc_set_cpus_count(pcms->rtc, pcms->boot_cpus); > - fw_cfg_modify_i16(pcms->fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus); > + rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); > + fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); > out: > error_propagate(errp, local_err); > } > @@ -2244,6 +1733,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, > CPUX86State *env = &cpu->env; > MachineState *ms = MACHINE(hotplug_dev); > PCMachineState *pcms = PC_MACHINE(hotplug_dev); > + X86MachineState *x86ms = X86_MACHINE(hotplug_dev); > unsigned int smp_cores = ms->smp.cores; > unsigned int smp_threads = ms->smp.threads; > > @@ -2253,7 +1743,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, > return; > } > > - env->nr_dies = pcms->smp_dies; > + env->nr_dies = x86ms->smp_dies; > > /* > * If APIC ID is not set, > @@ -2261,13 +1751,13 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, > */ > if (cpu->apic_id == UNASSIGNED_APIC_ID) { > int max_socket = (ms->smp.max_cpus - 1) / > - smp_threads / smp_cores / pcms->smp_dies; > + smp_threads / smp_cores / x86ms->smp_dies; > > /* > * die-id was optional in QEMU 4.0 and older, so keep it optional > * if there's only one die per socket. > */ > - if (cpu->die_id < 0 && pcms->smp_dies == 1) { > + if (cpu->die_id < 0 && x86ms->smp_dies == 1) { > cpu->die_id = 0; > } > > @@ -2282,9 +1772,9 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, > if (cpu->die_id < 0) { > error_setg(errp, "CPU die-id is not set"); > return; > - } else if (cpu->die_id > pcms->smp_dies - 1) { > + } else if (cpu->die_id > x86ms->smp_dies - 1) { > error_setg(errp, "Invalid CPU die-id: %u must be in range 0:%u", > - cpu->die_id, pcms->smp_dies - 1); > + cpu->die_id, x86ms->smp_dies - 1); > return; > } > if (cpu->core_id < 0) { > @@ -2308,7 +1798,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, > topo.die_id = cpu->die_id; > topo.core_id = cpu->core_id; > topo.smt_id = cpu->thread_id; > - cpu->apic_id = apicid_from_topo_ids(pcms->smp_dies, smp_cores, > + cpu->apic_id = apicid_from_topo_ids(x86ms->smp_dies, smp_cores, > smp_threads, &topo); > } > > @@ -2316,7 +1806,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, > if (!cpu_slot) { > MachineState *ms = MACHINE(pcms); > > - x86_topo_ids_from_apicid(cpu->apic_id, pcms->smp_dies, > + x86_topo_ids_from_apicid(cpu->apic_id, x86ms->smp_dies, > smp_cores, smp_threads, &topo); > error_setg(errp, > "Invalid CPU [socket: %u, die: %u, core: %u, thread: %u] with" > @@ -2338,7 +1828,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, > /* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn() > * once -smp refactoring is complete and there will be CPU private > * CPUState::nr_cores and CPUState::nr_threads fields instead of globals */ > - x86_topo_ids_from_apicid(cpu->apic_id, pcms->smp_dies, > + x86_topo_ids_from_apicid(cpu->apic_id, x86ms->smp_dies, > smp_cores, smp_threads, &topo); > if (cpu->socket_id != -1 && cpu->socket_id != topo.pkg_id) { > error_setg(errp, "property socket-id: %u doesn't match set apic-id:" > @@ -2520,45 +2010,6 @@ pc_machine_get_device_memory_region_size(Object *obj, Visitor *v, > visit_type_int(v, name, &value, errp); > } > > -static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v, > - const char *name, void *opaque, > - Error **errp) > -{ > - PCMachineState *pcms = PC_MACHINE(obj); > - uint64_t value = pcms->max_ram_below_4g; > - > - visit_type_size(v, name, &value, errp); > -} > - > -static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v, > - const char *name, void *opaque, > - Error **errp) > -{ > - PCMachineState *pcms = PC_MACHINE(obj); > - Error *error = NULL; > - uint64_t value; > - > - visit_type_size(v, name, &value, &error); > - if (error) { > - error_propagate(errp, error); > - return; > - } > - if (value > 4 * GiB) { > - error_setg(&error, > - "Machine option 'max-ram-below-4g=%"PRIu64 > - "' expects size less than or equal to 4G", value); > - error_propagate(errp, error); > - return; > - } > - > - if (value < 1 * MiB) { > - warn_report("Only %" PRIu64 " bytes of RAM below the 4GiB boundary," > - "BIOS may not work with less than 1MiB", value); > - } > - > - pcms->max_ram_below_4g = value; > -} > - > static void pc_machine_get_vmport(Object *obj, Visitor *v, const char *name, > void *opaque, Error **errp) > { > @@ -2664,7 +2115,6 @@ static void pc_machine_initfn(Object *obj) > { > PCMachineState *pcms = PC_MACHINE(obj); > > - pcms->max_ram_below_4g = 0; /* use default */ > pcms->smm = ON_OFF_AUTO_AUTO; > #ifdef CONFIG_VMPORT > pcms->vmport = ON_OFF_AUTO_AUTO; > @@ -2676,7 +2126,6 @@ static void pc_machine_initfn(Object *obj) > pcms->smbus_enabled = true; > pcms->sata_enabled = true; > pcms->pit_enabled = true; > - pcms->smp_dies = 1; > > pc_system_flash_create(pcms); > } > @@ -2707,85 +2156,6 @@ static void pc_machine_wakeup(MachineState *machine) > cpu_synchronize_all_post_reset(); > } > > -static CpuInstanceProperties > -pc_cpu_index_to_props(MachineState *ms, unsigned cpu_index) > -{ > - MachineClass *mc = MACHINE_GET_CLASS(ms); > - const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); > - > - assert(cpu_index < possible_cpus->len); > - return possible_cpus->cpus[cpu_index].props; > -} > - > -static int64_t pc_get_default_cpu_node_id(const MachineState *ms, int idx) > -{ > - X86CPUTopoInfo topo; > - PCMachineState *pcms = PC_MACHINE(ms); > - > - assert(idx < ms->possible_cpus->len); > - x86_topo_ids_from_apicid(ms->possible_cpus->cpus[idx].arch_id, > - pcms->smp_dies, ms->smp.cores, > - ms->smp.threads, &topo); > - return topo.pkg_id % ms->numa_state->num_nodes; > -} > - > -static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms) > -{ > - PCMachineState *pcms = PC_MACHINE(ms); > - int i; > - unsigned int max_cpus = ms->smp.max_cpus; > - > - if (ms->possible_cpus) { > - /* > - * make sure that max_cpus hasn't changed since the first use, i.e. > - * -smp hasn't been parsed after it > - */ > - assert(ms->possible_cpus->len == max_cpus); > - return ms->possible_cpus; > - } > - > - ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) + > - sizeof(CPUArchId) * max_cpus); > - ms->possible_cpus->len = max_cpus; > - for (i = 0; i < ms->possible_cpus->len; i++) { > - X86CPUTopoInfo topo; > - > - ms->possible_cpus->cpus[i].type = ms->cpu_type; > - ms->possible_cpus->cpus[i].vcpus_count = 1; > - ms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(pcms, i); > - x86_topo_ids_from_apicid(ms->possible_cpus->cpus[i].arch_id, > - pcms->smp_dies, ms->smp.cores, > - ms->smp.threads, &topo); > - ms->possible_cpus->cpus[i].props.has_socket_id = true; > - ms->possible_cpus->cpus[i].props.socket_id = topo.pkg_id; > - if (pcms->smp_dies > 1) { > - ms->possible_cpus->cpus[i].props.has_die_id = true; > - ms->possible_cpus->cpus[i].props.die_id = topo.die_id; > - } > - ms->possible_cpus->cpus[i].props.has_core_id = true; > - ms->possible_cpus->cpus[i].props.core_id = topo.core_id; > - ms->possible_cpus->cpus[i].props.has_thread_id = true; > - ms->possible_cpus->cpus[i].props.thread_id = topo.smt_id; > - } > - return ms->possible_cpus; > -} > - > -static void x86_nmi(NMIState *n, int cpu_index, Error **errp) > -{ > - /* cpu index isn't used */ > - CPUState *cs; > - > - CPU_FOREACH(cs) { > - X86CPU *cpu = X86_CPU(cs); > - > - if (!cpu->apic_state) { > - cpu_interrupt(cs, CPU_INTERRUPT_NMI); > - } else { > - apic_deliver_nmi(cpu->apic_state); > - } > - } > -} > - > static void pc_machine_class_init(ObjectClass *oc, void *data) > { > MachineClass *mc = MACHINE_CLASS(oc); > @@ -2810,14 +2180,11 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) > pcmc->pvh_enabled = true; > assert(!mc->get_hotplug_handler); > mc->get_hotplug_handler = pc_get_hotplug_handler; > - mc->cpu_index_to_instance_props = pc_cpu_index_to_props; > - mc->get_default_cpu_node_id = pc_get_default_cpu_node_id; > - mc->possible_cpu_arch_ids = pc_possible_cpu_arch_ids; > mc->auto_enable_numa_with_memhp = true; > mc->has_hotpluggable_cpus = true; > mc->default_boot_order = "cad"; > - mc->hot_add_cpu = pc_hot_add_cpu; > - mc->smp_parse = pc_smp_parse; > + mc->hot_add_cpu = x86_hot_add_cpu; > + mc->smp_parse = x86_smp_parse; > mc->block_default_type = IF_IDE; > mc->max_cpus = 255; > mc->reset = pc_machine_reset; > @@ -2835,13 +2202,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) > pc_machine_get_device_memory_region_size, NULL, > NULL, NULL, &error_abort); > > - object_class_property_add(oc, PC_MACHINE_MAX_RAM_BELOW_4G, "size", > - pc_machine_get_max_ram_below_4g, pc_machine_set_max_ram_below_4g, > - NULL, NULL, &error_abort); > - > - object_class_property_set_description(oc, PC_MACHINE_MAX_RAM_BELOW_4G, > - "Maximum ram below the 4G boundary (32bit boundary)", &error_abort); > - > object_class_property_add(oc, PC_MACHINE_SMM, "OnOffAuto", > pc_machine_get_smm, pc_machine_set_smm, > NULL, NULL, &error_abort); > @@ -2866,7 +2226,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) > > static const TypeInfo pc_machine_info = { > .name = TYPE_PC_MACHINE, > - .parent = TYPE_MACHINE, > + .parent = TYPE_X86_MACHINE, > .abstract = true, > .instance_size = sizeof(PCMachineState), > .instance_init = pc_machine_initfn, > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c > index 2362675149..f63c27bc74 100644 > --- a/hw/i386/pc_piix.c > +++ b/hw/i386/pc_piix.c > @@ -27,6 +27,7 @@ > > #include "qemu/units.h" > #include "hw/loader.h" > +#include "hw/i386/x86.h" > #include "hw/i386/pc.h" > #include "hw/i386/apic.h" > #include "hw/display/ramfb.h" > @@ -73,6 +74,7 @@ static void pc_init1(MachineState *machine, > { > PCMachineState *pcms = PC_MACHINE(machine); > PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); > + X86MachineState *x86ms = X86_MACHINE(pcms); > MemoryRegion *system_memory = get_system_memory(); > MemoryRegion *system_io = get_system_io(); > int i; > @@ -125,11 +127,11 @@ static void pc_init1(MachineState *machine, > if (xen_enabled()) { > xen_hvm_init(pcms, &ram_memory); > } else { > - if (!pcms->max_ram_below_4g) { > - pcms->max_ram_below_4g = 0xe0000000; /* default: 3.5G */ > + if (!x86ms->max_ram_below_4g) { > + x86ms->max_ram_below_4g = 0xe0000000; /* default: 3.5G */ > } > - lowmem = pcms->max_ram_below_4g; > - if (machine->ram_size >= pcms->max_ram_below_4g) { > + lowmem = x86ms->max_ram_below_4g; > + if (machine->ram_size >= x86ms->max_ram_below_4g) { > if (pcmc->gigabyte_align) { > if (lowmem > 0xc0000000) { > lowmem = 0xc0000000; > @@ -138,21 +140,21 @@ static void pc_init1(MachineState *machine, > warn_report("Large machine and max_ram_below_4g " > "(%" PRIu64 ") not a multiple of 1G; " > "possible bad performance.", > - pcms->max_ram_below_4g); > + x86ms->max_ram_below_4g); > } > } > } > > if (machine->ram_size >= lowmem) { > - pcms->above_4g_mem_size = machine->ram_size - lowmem; > - pcms->below_4g_mem_size = lowmem; > + x86ms->above_4g_mem_size = machine->ram_size - lowmem; > + x86ms->below_4g_mem_size = lowmem; > } else { > - pcms->above_4g_mem_size = 0; > - pcms->below_4g_mem_size = machine->ram_size; > + x86ms->above_4g_mem_size = 0; > + x86ms->below_4g_mem_size = machine->ram_size; > } > } > > - pc_cpus_init(pcms); > + x86_cpus_init(x86ms, pcmc->default_cpu_version); > > if (kvm_enabled() && pcmc->kvmclock_enabled) { > kvmclock_create(); > @@ -190,19 +192,19 @@ static void pc_init1(MachineState *machine, > gsi_state = g_malloc0(sizeof(*gsi_state)); > if (kvm_ioapic_in_kernel()) { > kvm_pc_setup_irq_routing(pcmc->pci_enabled); > - pcms->gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state, > - GSI_NUM_PINS); > + x86ms->gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state, > + GSI_NUM_PINS); > } else { > - pcms->gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS); > + x86ms->gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS); > } > > if (pcmc->pci_enabled) { > pci_bus = i440fx_init(host_type, > pci_type, > - &i440fx_state, &piix3_devfn, &isa_bus, pcms->gsi, > + &i440fx_state, &piix3_devfn, &isa_bus, x86ms->gsi, > system_memory, system_io, machine->ram_size, > - pcms->below_4g_mem_size, > - pcms->above_4g_mem_size, > + x86ms->below_4g_mem_size, > + x86ms->above_4g_mem_size, > pci_memory, ram_memory); > pcms->bus = pci_bus; > } else { > @@ -212,7 +214,7 @@ static void pc_init1(MachineState *machine, > &error_abort); > no_hpet = 1; > } > - isa_bus_irqs(isa_bus, pcms->gsi); > + isa_bus_irqs(isa_bus, x86ms->gsi); > > if (kvm_pic_in_kernel()) { > i8259 = kvm_i8259_init(isa_bus); > @@ -230,7 +232,7 @@ static void pc_init1(MachineState *machine, > ioapic_init_gsi(gsi_state, "i440fx"); > } > > - pc_register_ferr_irq(pcms->gsi[13]); > + pc_register_ferr_irq(x86ms->gsi[13]); > > pc_vga_init(isa_bus, pcmc->pci_enabled ? pci_bus : NULL); > > @@ -240,7 +242,7 @@ static void pc_init1(MachineState *machine, > } > > /* init basic PC hardware */ > - pc_basic_device_init(isa_bus, pcms->gsi, &rtc_state, true, > + pc_basic_device_init(isa_bus, x86ms->gsi, &rtc_state, true, > (pcms->vmport != ON_OFF_AUTO_ON), pcms->pit_enabled, > 0x4); > > @@ -288,7 +290,7 @@ else { > smi_irq = qemu_allocate_irq(pc_acpi_smi_interrupt, first_cpu, 0); > /* TODO: Populate SPD eeprom data. */ > smbus = piix4_pm_init(pci_bus, piix3_devfn + 3, 0xb100, > - pcms->gsi[9], smi_irq, > + x86ms->gsi[9], smi_irq, > pc_machine_is_smm_enabled(pcms), > &piix4_pm); > smbus_eeprom_init(smbus, 8, NULL, 0); > @@ -304,7 +306,7 @@ else { > > if (machine->nvdimms_state->is_enabled) { > nvdimm_init_acpi_state(machine->nvdimms_state, system_io, > - pcms->fw_cfg, OBJECT(pcms)); > + x86ms->fw_cfg, OBJECT(pcms)); > } > } > > @@ -728,7 +730,7 @@ DEFINE_I440FX_MACHINE(v1_4, "pc-i440fx-1.4", pc_compat_1_4_fn, > > static void pc_i440fx_1_3_machine_options(MachineClass *m) > { > - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); > + X86MachineClass *x86mc = X86_MACHINE_CLASS(m); > static GlobalProperty compat[] = { > PC_CPU_MODEL_IDS("1.3.0") > { "usb-tablet", "usb_version", "1" }, > @@ -739,7 +741,7 @@ static void pc_i440fx_1_3_machine_options(MachineClass *m) > > pc_i440fx_1_4_machine_options(m); > m->hw_version = "1.3.0"; > - pcmc->compat_apic_id_mode = true; > + x86mc->compat_apic_id_mode = true; > compat_props_add(m->compat_props, compat, G_N_ELEMENTS(compat)); > } > > diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c > index d4e8a1cb9f..71f71bc61d 100644 > --- a/hw/i386/pc_q35.c > +++ b/hw/i386/pc_q35.c > @@ -41,6 +41,7 @@ > #include "hw/pci-host/q35.h" > #include "hw/qdev-properties.h" > #include "exec/address-spaces.h" > +#include "hw/i386/x86.h" > #include "hw/i386/pc.h" > #include "hw/i386/ich9.h" > #include "hw/i386/amd_iommu.h" > @@ -115,6 +116,7 @@ static void pc_q35_init(MachineState *machine) > { > PCMachineState *pcms = PC_MACHINE(machine); > PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); > + X86MachineState *x86ms = X86_MACHINE(pcms); > Q35PCIHost *q35_host; > PCIHostState *phb; > PCIBus *host_bus; > @@ -152,34 +154,34 @@ static void pc_q35_init(MachineState *machine) > /* Handle the machine opt max-ram-below-4g. It is basically doing > * min(qemu limit, user limit). > */ > - if (!pcms->max_ram_below_4g) { > - pcms->max_ram_below_4g = 1ULL << 32; /* default: 4G */; > + if (!x86ms->max_ram_below_4g) { > + x86ms->max_ram_below_4g = 1ULL << 32; /* default: 4G */; > } > - if (lowmem > pcms->max_ram_below_4g) { > - lowmem = pcms->max_ram_below_4g; > + if (lowmem > x86ms->max_ram_below_4g) { > + lowmem = x86ms->max_ram_below_4g; > if (machine->ram_size - lowmem > lowmem && > lowmem & (1 * GiB - 1)) { > warn_report("There is possibly poor performance as the ram size " > " (0x%" PRIx64 ") is more then twice the size of" > " max-ram-below-4g (%"PRIu64") and" > " max-ram-below-4g is not a multiple of 1G.", > - (uint64_t)machine->ram_size, pcms->max_ram_below_4g); > + (uint64_t)machine->ram_size, x86ms->max_ram_below_4g); > } > } > > if (machine->ram_size >= lowmem) { > - pcms->above_4g_mem_size = machine->ram_size - lowmem; > - pcms->below_4g_mem_size = lowmem; > + x86ms->above_4g_mem_size = machine->ram_size - lowmem; > + x86ms->below_4g_mem_size = lowmem; > } else { > - pcms->above_4g_mem_size = 0; > - pcms->below_4g_mem_size = machine->ram_size; > + x86ms->above_4g_mem_size = 0; > + x86ms->below_4g_mem_size = machine->ram_size; > } > > if (xen_enabled()) { > xen_hvm_init(pcms, &ram_memory); > } > > - pc_cpus_init(pcms); > + x86_cpus_init(x86ms, pcmc->default_cpu_version); > > kvmclock_create(); > > @@ -213,10 +215,10 @@ static void pc_q35_init(MachineState *machine) > gsi_state = g_malloc0(sizeof(*gsi_state)); > if (kvm_ioapic_in_kernel()) { > kvm_pc_setup_irq_routing(pcmc->pci_enabled); > - pcms->gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state, > + x86ms->gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state, > GSI_NUM_PINS); > } else { > - pcms->gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS); > + x86ms->gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS); > } > > /* create pci host bus */ > @@ -231,9 +233,9 @@ static void pc_q35_init(MachineState *machine) > MCH_HOST_PROP_SYSTEM_MEM, NULL); > object_property_set_link(OBJECT(q35_host), OBJECT(system_io), > MCH_HOST_PROP_IO_MEM, NULL); > - object_property_set_int(OBJECT(q35_host), pcms->below_4g_mem_size, > + object_property_set_int(OBJECT(q35_host), x86ms->below_4g_mem_size, > PCI_HOST_BELOW_4G_MEM_SIZE, NULL); > - object_property_set_int(OBJECT(q35_host), pcms->above_4g_mem_size, > + object_property_set_int(OBJECT(q35_host), x86ms->above_4g_mem_size, > PCI_HOST_ABOVE_4G_MEM_SIZE, NULL); > /* pci */ > qdev_init_nofail(DEVICE(q35_host)); > @@ -255,7 +257,7 @@ static void pc_q35_init(MachineState *machine) > ich9_lpc = ICH9_LPC_DEVICE(lpc); > lpc_dev = DEVICE(lpc); > for (i = 0; i < GSI_NUM_PINS; i++) { > - qdev_connect_gpio_out_named(lpc_dev, ICH9_GPIO_GSI, i, pcms->gsi[i]); > + qdev_connect_gpio_out_named(lpc_dev, ICH9_GPIO_GSI, i, x86ms->gsi[i]); > } > pci_bus_irqs(host_bus, ich9_lpc_set_irq, ich9_lpc_map_irq, ich9_lpc, > ICH9_LPC_NB_PIRQS); > @@ -279,7 +281,7 @@ static void pc_q35_init(MachineState *machine) > ioapic_init_gsi(gsi_state, "q35"); > } > > - pc_register_ferr_irq(pcms->gsi[13]); > + pc_register_ferr_irq(x86ms->gsi[13]); > > assert(pcms->vmport != ON_OFF_AUTO__MAX); > if (pcms->vmport == ON_OFF_AUTO_AUTO) { > @@ -287,7 +289,7 @@ static void pc_q35_init(MachineState *machine) > } > > /* init basic PC hardware */ > - pc_basic_device_init(isa_bus, pcms->gsi, &rtc_state, !mc->no_floppy, > + pc_basic_device_init(isa_bus, x86ms->gsi, &rtc_state, !mc->no_floppy, > (pcms->vmport != ON_OFF_AUTO_ON), pcms->pit_enabled, > 0xff0104); > > @@ -330,7 +332,7 @@ static void pc_q35_init(MachineState *machine) > > if (machine->nvdimms_state->is_enabled) { > nvdimm_init_acpi_state(machine->nvdimms_state, system_io, > - pcms->fw_cfg, OBJECT(pcms)); > + x86ms->fw_cfg, OBJECT(pcms)); > } > } > > diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c > index a9983f0bfb..97f38e0423 100644 > --- a/hw/i386/pc_sysfw.c > +++ b/hw/i386/pc_sysfw.c > @@ -31,6 +31,7 @@ > #include "qemu/option.h" > #include "qemu/units.h" > #include "hw/sysbus.h" > +#include "hw/i386/x86.h" > #include "hw/i386/pc.h" > #include "hw/loader.h" > #include "hw/qdev-properties.h" > @@ -38,8 +39,6 @@ > #include "hw/block/flash.h" > #include "sysemu/kvm.h" > > -#define BIOS_FILENAME "bios.bin" > - > /* > * We don't have a theoretically justifiable exact lower bound on the base > * address of any flash mapping. In practice, the IO-APIC MMIO range is > @@ -211,59 +210,6 @@ static void pc_system_flash_map(PCMachineState *pcms, > } > } > > -static void old_pc_system_rom_init(MemoryRegion *rom_memory, bool isapc_ram_fw) > -{ > - char *filename; > - MemoryRegion *bios, *isa_bios; > - int bios_size, isa_bios_size; > - int ret; > - > - /* BIOS load */ > - if (bios_name == NULL) { > - bios_name = BIOS_FILENAME; > - } > - filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); > - if (filename) { > - bios_size = get_image_size(filename); > - } else { > - bios_size = -1; > - } > - if (bios_size <= 0 || > - (bios_size % 65536) != 0) { > - goto bios_error; > - } > - bios = g_malloc(sizeof(*bios)); > - memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal); > - if (!isapc_ram_fw) { > - memory_region_set_readonly(bios, true); > - } > - ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); > - if (ret != 0) { > - bios_error: > - fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name); > - exit(1); > - } > - g_free(filename); > - > - /* map the last 128KB of the BIOS in ISA space */ > - isa_bios_size = MIN(bios_size, 128 * KiB); > - isa_bios = g_malloc(sizeof(*isa_bios)); > - memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, > - bios_size - isa_bios_size, isa_bios_size); > - memory_region_add_subregion_overlap(rom_memory, > - 0x100000 - isa_bios_size, > - isa_bios, > - 1); > - if (!isapc_ram_fw) { > - memory_region_set_readonly(isa_bios, true); > - } > - > - /* map all the bios at the top of memory */ > - memory_region_add_subregion(rom_memory, > - (uint32_t)(-bios_size), > - bios); > -} > - > void pc_system_firmware_init(PCMachineState *pcms, > MemoryRegion *rom_memory) > { > @@ -272,7 +218,7 @@ void pc_system_firmware_init(PCMachineState *pcms, > BlockBackend *pflash_blk[ARRAY_SIZE(pcms->flash)]; > > if (!pcmc->pci_enabled) { > - old_pc_system_rom_init(rom_memory, true); > + x86_system_rom_init(rom_memory, true); > return; > } > > @@ -293,7 +239,7 @@ void pc_system_firmware_init(PCMachineState *pcms, > > if (!pflash_blk[0]) { > /* Machine property pflash0 not set, use ROM mode */ > - old_pc_system_rom_init(rom_memory, false); > + x86_system_rom_init(rom_memory, false); > } else { > if (kvm_enabled() && !kvm_readonly_mem_enabled()) { > /* > diff --git a/hw/i386/x86.c b/hw/i386/x86.c > new file mode 100644 > index 0000000000..4de9dd100f > --- /dev/null > +++ b/hw/i386/x86.c > @@ -0,0 +1,788 @@ > +/* > + * Copyright (c) 2003-2004 Fabrice Bellard > + * Copyright (c) 2019 Red Hat, Inc. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a copy > + * of this software and associated documentation files (the "Software"), to deal > + * in the Software without restriction, including without limitation the rights > + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell > + * copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN > + * THE SOFTWARE. > + */ > +#include "qemu/osdep.h" > +#include "qemu/error-report.h" > +#include "qemu/option.h" > +#include "qemu/cutils.h" > +#include "qemu/units.h" > +#include "qapi/error.h" > +#include "qapi/qmp/qerror.h" > +#include "qapi/qapi-visit-common.h" > +#include "qapi/visitor.h" > +#include "sysemu/qtest.h" > +#include "sysemu/numa.h" > +#include "sysemu/replay.h" > +#include "sysemu/sysemu.h" > + > +#include "hw/i386/x86.h" > +#include "target/i386/cpu.h" > +#include "hw/i386/topology.h" > +#include "hw/i386/fw_cfg.h" > +#include "hw/acpi/cpu_hotplug.h" > +#include "hw/nmi.h" > +#include "hw/loader.h" > +#include "multiboot.h" > +#include "pvh.h" > +#include "standard-headers/asm-x86/bootparam.h" > + > +#define BIOS_FILENAME "bios.bin" > + > +/* Calculates initial APIC ID for a specific CPU index > + * > + * Currently we need to be able to calculate the APIC ID from the CPU index > + * alone (without requiring a CPU object), as the QEMU<->Seabios interfaces have > + * no concept of "CPU index", and the NUMA tables on fw_cfg need the APIC ID of > + * all CPUs up to max_cpus. > + */ > +uint32_t x86_cpu_apic_id_from_index(X86MachineState *x86ms, > + unsigned int cpu_index) > +{ > + MachineState *ms = MACHINE(x86ms); > + X86MachineClass *x86mc = X86_MACHINE_GET_CLASS(x86ms); > + uint32_t correct_id; > + static bool warned; > + > + correct_id = x86_apicid_from_cpu_idx(x86ms->smp_dies, ms->smp.cores, > + ms->smp.threads, cpu_index); > + if (x86mc->compat_apic_id_mode) { > + if (cpu_index != correct_id && !warned && !qtest_enabled()) { > + error_report("APIC IDs set in compatibility mode, " > + "CPU topology won't match the configuration"); > + warned = true; > + } > + return cpu_index; > + } else { > + return correct_id; > + } > +} > + > + > +static void x86_new_cpu(X86MachineState *x86ms, int64_t apic_id, Error **errp) > +{ > + Object *cpu = NULL; > + Error *local_err = NULL; > + CPUX86State *env = NULL; > + > + cpu = object_new(MACHINE(x86ms)->cpu_type); > + > + env = &X86_CPU(cpu)->env; > + env->nr_dies = x86ms->smp_dies; > + > + object_property_set_uint(cpu, apic_id, "apic-id", &local_err); > + object_property_set_bool(cpu, true, "realized", &local_err); > + > + object_unref(cpu); > + error_propagate(errp, local_err); > +} > + > +/* > + * This function is very similar to smp_parse() > + * in hw/core/machine.c but includes CPU die support. > + */ > +void x86_smp_parse(MachineState *ms, QemuOpts *opts) > +{ > + X86MachineState *x86ms = X86_MACHINE(ms); > + > + if (opts) { > + unsigned cpus = qemu_opt_get_number(opts, "cpus", 0); > + unsigned sockets = qemu_opt_get_number(opts, "sockets", 0); > + unsigned dies = qemu_opt_get_number(opts, "dies", 1); > + unsigned cores = qemu_opt_get_number(opts, "cores", 0); > + unsigned threads = qemu_opt_get_number(opts, "threads", 0); > + > + /* compute missing values, prefer sockets over cores over threads */ > + if (cpus == 0 || sockets == 0) { > + cores = cores > 0 ? cores : 1; > + threads = threads > 0 ? threads : 1; > + if (cpus == 0) { > + sockets = sockets > 0 ? sockets : 1; > + cpus = cores * threads * dies * sockets; > + } else { > + ms->smp.max_cpus = > + qemu_opt_get_number(opts, "maxcpus", cpus); > + sockets = ms->smp.max_cpus / (cores * threads * dies); > + } > + } else if (cores == 0) { > + threads = threads > 0 ? threads : 1; > + cores = cpus / (sockets * dies * threads); > + cores = cores > 0 ? cores : 1; > + } else if (threads == 0) { > + threads = cpus / (cores * dies * sockets); > + threads = threads > 0 ? threads : 1; > + } else if (sockets * dies * cores * threads < cpus) { > + error_report("cpu topology: " > + "sockets (%u) * dies (%u) * cores (%u) * threads (%u) < " > + "smp_cpus (%u)", > + sockets, dies, cores, threads, cpus); > + exit(1); > + } > + > + ms->smp.max_cpus = > + qemu_opt_get_number(opts, "maxcpus", cpus); > + > + if (ms->smp.max_cpus < cpus) { > + error_report("maxcpus must be equal to or greater than smp"); > + exit(1); > + } > + > + if (sockets * dies * cores * threads > ms->smp.max_cpus) { > + error_report("cpu topology: " > + "sockets (%u) * dies (%u) * cores (%u) * threads (%u) > " > + "maxcpus (%u)", > + sockets, dies, cores, threads, > + ms->smp.max_cpus); > + exit(1); > + } > + > + if (sockets * dies * cores * threads != ms->smp.max_cpus) { > + warn_report("Invalid CPU topology deprecated: " > + "sockets (%u) * dies (%u) * cores (%u) * threads (%u) " > + "!= maxcpus (%u)", > + sockets, dies, cores, threads, > + ms->smp.max_cpus); > + } > + > + ms->smp.cpus = cpus; > + ms->smp.cores = cores; > + ms->smp.threads = threads; > + x86ms->smp_dies = dies; > + } > + > + if (ms->smp.cpus > 1) { > + Error *blocker = NULL; > + error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED, "smp"); > + replay_add_blocker(blocker); > + } > +} > + > +void x86_hot_add_cpu(MachineState *ms, const int64_t id, Error **errp) > +{ > + X86MachineState *x86ms = X86_MACHINE(ms); > + int64_t apic_id = x86_cpu_apic_id_from_index(x86ms, id); > + Error *local_err = NULL; > + > + if (id < 0) { > + error_setg(errp, "Invalid CPU id: %" PRIi64, id); > + return; > + } > + > + if (apic_id >= ACPI_CPU_HOTPLUG_ID_LIMIT) { > + error_setg(errp, "Unable to add CPU: %" PRIi64 > + ", resulting APIC ID (%" PRIi64 ") is too large", > + id, apic_id); > + return; > + } > + > + x86_new_cpu(X86_MACHINE(ms), apic_id, &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + return; > + } > +} > + > +void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) > +{ > + int i; > + const CPUArchIdList *possible_cpus; > + MachineState *ms = MACHINE(x86ms); > + MachineClass *mc = MACHINE_GET_CLASS(x86ms); > + > + x86_cpu_set_default_version(default_cpu_version); > + > + /* Calculates the limit to CPU APIC ID values > + * > + * Limit for the APIC ID value, so that all > + * CPU APIC IDs are < x86ms->apic_id_limit. > + * > + * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init(). > + */ > + x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms, > + ms->smp.max_cpus - 1) + 1; > + possible_cpus = mc->possible_cpu_arch_ids(ms); > + for (i = 0; i < ms->smp.cpus; i++) { > + x86_new_cpu(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); > + } > +} > + > +void x86_nmi(NMIState *n, int cpu_index, Error **errp) > +{ > + /* cpu index isn't used */ > + CPUState *cs; > + > + CPU_FOREACH(cs) { > + X86CPU *cpu = X86_CPU(cs); > + > + if (!cpu->apic_state) { > + cpu_interrupt(cs, CPU_INTERRUPT_NMI); > + } else { > + apic_deliver_nmi(cpu->apic_state); > + } > + } > +} > + > +CpuInstanceProperties > +x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index) > +{ > + MachineClass *mc = MACHINE_GET_CLASS(ms); > + const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); > + > + assert(cpu_index < possible_cpus->len); > + return possible_cpus->cpus[cpu_index].props; > +} > + > +int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx) > +{ > + X86CPUTopoInfo topo; > + X86MachineState *x86ms = X86_MACHINE(ms); > + > + assert(idx < ms->possible_cpus->len); > + x86_topo_ids_from_apicid(ms->possible_cpus->cpus[idx].arch_id, > + x86ms->smp_dies, ms->smp.cores, > + ms->smp.threads, &topo); > + return topo.pkg_id % ms->numa_state->num_nodes; > +} > + > +const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms) > +{ > + X86MachineState *x86ms = X86_MACHINE(ms); > + int i; > + unsigned int max_cpus = ms->smp.max_cpus; > + > + if (ms->possible_cpus) { > + /* > + * make sure that max_cpus hasn't changed since the first use, i.e. > + * -smp hasn't been parsed after it > + */ > + assert(ms->possible_cpus->len == max_cpus); > + return ms->possible_cpus; > + } > + > + ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) + > + sizeof(CPUArchId) * max_cpus); > + ms->possible_cpus->len = max_cpus; > + for (i = 0; i < ms->possible_cpus->len; i++) { > + X86CPUTopoInfo topo; > + > + ms->possible_cpus->cpus[i].type = ms->cpu_type; > + ms->possible_cpus->cpus[i].vcpus_count = 1; > + ms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(x86ms, i); > + x86_topo_ids_from_apicid(ms->possible_cpus->cpus[i].arch_id, > + x86ms->smp_dies, ms->smp.cores, > + ms->smp.threads, &topo); > + ms->possible_cpus->cpus[i].props.has_socket_id = true; > + ms->possible_cpus->cpus[i].props.socket_id = topo.pkg_id; > + if (x86ms->smp_dies > 1) { > + ms->possible_cpus->cpus[i].props.has_die_id = true; > + ms->possible_cpus->cpus[i].props.die_id = topo.die_id; > + } > + ms->possible_cpus->cpus[i].props.has_core_id = true; > + ms->possible_cpus->cpus[i].props.core_id = topo.core_id; > + ms->possible_cpus->cpus[i].props.has_thread_id = true; > + ms->possible_cpus->cpus[i].props.thread_id = topo.smt_id; > + } > + return ms->possible_cpus; > +} > + > +void x86_system_rom_init(MemoryRegion *rom_memory, bool isapc_ram_fw) > +{ > + char *filename; > + MemoryRegion *bios, *isa_bios; > + int bios_size, isa_bios_size; > + int ret; > + > + /* BIOS load */ > + if (bios_name == NULL) { > + bios_name = BIOS_FILENAME; > + } > + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); > + if (filename) { > + bios_size = get_image_size(filename); > + } else { > + bios_size = -1; > + } > + if (bios_size <= 0 || > + (bios_size % 65536) != 0) { > + goto bios_error; > + } > + bios = g_malloc(sizeof(*bios)); > + memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal); > + if (!isapc_ram_fw) { > + memory_region_set_readonly(bios, true); > + } > + ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); > + if (ret != 0) { > + bios_error: > + fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name); > + exit(1); > + } > + g_free(filename); > + > + /* map the last 128KB of the BIOS in ISA space */ > + isa_bios_size = MIN(bios_size, 128 * KiB); > + isa_bios = g_malloc(sizeof(*isa_bios)); > + memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, > + bios_size - isa_bios_size, isa_bios_size); > + memory_region_add_subregion_overlap(rom_memory, > + 0x100000 - isa_bios_size, > + isa_bios, > + 1); > + if (!isapc_ram_fw) { > + memory_region_set_readonly(isa_bios, true); > + } > + > + /* map all the bios at the top of memory */ > + memory_region_add_subregion(rom_memory, > + (uint32_t)(-bios_size), > + bios); > +} > + > +static long get_file_size(FILE *f) > +{ > + long where, size; > + > + /* XXX: on Unix systems, using fstat() probably makes more sense */ > + > + where = ftell(f); > + fseek(f, 0, SEEK_END); > + size = ftell(f); > + fseek(f, where, SEEK_SET); > + > + return size; > +} > + > +struct setup_data { > + uint64_t next; > + uint32_t type; > + uint32_t len; > + uint8_t data[0]; > +} __attribute__((packed)); > + > +void load_linux(X86MachineState *x86ms, > + FWCfgState *fw_cfg, > + unsigned acpi_data_size, > + bool linuxboot_dma_enabled, > + bool pvh_enabled) > +{ > + uint16_t protocol; > + int setup_size, kernel_size, cmdline_size; > + int dtb_size, setup_data_offset; > + uint32_t initrd_max; > + uint8_t header[8192], *setup, *kernel; > + hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; > + FILE *f; > + char *vmode; > + MachineState *machine = MACHINE(x86ms); > + struct setup_data *setup_data; > + const char *kernel_filename = machine->kernel_filename; > + const char *initrd_filename = machine->initrd_filename; > + const char *dtb_filename = machine->dtb; > + const char *kernel_cmdline = machine->kernel_cmdline; > + > + /* Align to 16 bytes as a paranoia measure */ > + cmdline_size = (strlen(kernel_cmdline)+16) & ~15; > + > + /* load the kernel header */ > + f = fopen(kernel_filename, "rb"); > + if (!f || !(kernel_size = get_file_size(f)) || > + fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != > + MIN(ARRAY_SIZE(header), kernel_size)) { > + fprintf(stderr, "qemu: could not load kernel '%s': %s\n", > + kernel_filename, strerror(errno)); > + exit(1); > + } > + > + /* kernel protocol version */ > +#if 0 > + fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202)); > +#endif > + if (ldl_p(header+0x202) == 0x53726448) { > + protocol = lduw_p(header+0x206); > + } else { > + size_t pvh_start_addr; > + uint32_t mh_load_addr = 0; > + uint32_t elf_kernel_size = 0; > + /* > + * This could be a multiboot kernel. If it is, let's stop treating it > + * like a Linux kernel. > + * Note: some multiboot images could be in the ELF format (the same of > + * PVH), so we try multiboot first since we check the multiboot magic > + * header before to load it. > + */ > + if (load_multiboot(fw_cfg, f, kernel_filename, initrd_filename, > + kernel_cmdline, kernel_size, header)) { > + return; > + } > + /* > + * Check if the file is an uncompressed kernel file (ELF) and load it, > + * saving the PVH entry point used by the x86/HVM direct boot ABI. > + * If load_elfboot() is successful, populate the fw_cfg info. > + */ > + if (pvh_enabled && > + pvh_load_elfboot(kernel_filename, > + &mh_load_addr, &elf_kernel_size)) { > + fclose(f); > + > + pvh_start_addr = pvh_get_start_addr(); > + > + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr); > + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); > + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size); > + > + fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, > + strlen(kernel_cmdline) + 1); > + fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); > + > + fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header)); > + fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, > + header, sizeof(header)); > + > + /* load initrd */ > + if (initrd_filename) { > + GMappedFile *mapped_file; > + gsize initrd_size; > + gchar *initrd_data; > + GError *gerr = NULL; > + > + mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); > + if (!mapped_file) { > + fprintf(stderr, "qemu: error reading initrd %s: %s\n", > + initrd_filename, gerr->message); > + exit(1); > + } > + x86ms->initrd_mapped_file = mapped_file; > + > + initrd_data = g_mapped_file_get_contents(mapped_file); > + initrd_size = g_mapped_file_get_length(mapped_file); > + initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; > + if (initrd_size >= initrd_max) { > + fprintf(stderr, "qemu: initrd is too large, cannot support." > + "(max: %"PRIu32", need %"PRId64")\n", > + initrd_max, (uint64_t)initrd_size); > + exit(1); > + } > + > + initrd_addr = (initrd_max - initrd_size) & ~4095; > + > + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); > + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); > + fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, > + initrd_size); > + } > + > + option_rom[nb_option_roms].bootindex = 0; > + option_rom[nb_option_roms].name = "pvh.bin"; > + nb_option_roms++; > + > + return; > + } > + protocol = 0; > + } > + > + if (protocol < 0x200 || !(header[0x211] & 0x01)) { > + /* Low kernel */ > + real_addr = 0x90000; > + cmdline_addr = 0x9a000 - cmdline_size; > + prot_addr = 0x10000; > + } else if (protocol < 0x202) { > + /* High but ancient kernel */ > + real_addr = 0x90000; > + cmdline_addr = 0x9a000 - cmdline_size; > + prot_addr = 0x100000; > + } else { > + /* High and recent kernel */ > + real_addr = 0x10000; > + cmdline_addr = 0x20000; > + prot_addr = 0x100000; > + } > + > +#if 0 > + fprintf(stderr, > + "qemu: real_addr = 0x" TARGET_FMT_plx "\n" > + "qemu: cmdline_addr = 0x" TARGET_FMT_plx "\n" > + "qemu: prot_addr = 0x" TARGET_FMT_plx "\n", > + real_addr, > + cmdline_addr, > + prot_addr); > +#endif > + > + /* highest address for loading the initrd */ > + if (protocol >= 0x20c && > + lduw_p(header+0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) { > + /* > + * Linux has supported initrd up to 4 GB for a very long time (2007, > + * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013), > + * though it only sets initrd_max to 2 GB to "work around bootloader > + * bugs". Luckily, QEMU firmware(which does something like bootloader) > + * has supported this. > + * > + * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can > + * be loaded into any address. > + * > + * In addition, initrd_max is uint32_t simply because QEMU doesn't > + * support the 64-bit boot protocol (specifically the ext_ramdisk_image > + * field). > + * > + * Therefore here just limit initrd_max to UINT32_MAX simply as well. > + */ > + initrd_max = UINT32_MAX; > + } else if (protocol >= 0x203) { > + initrd_max = ldl_p(header+0x22c); > + } else { > + initrd_max = 0x37ffffff; > + } > + > + if (initrd_max >= x86ms->below_4g_mem_size - acpi_data_size) { > + initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; > + } > + > + fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); > + fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline)+1); > + fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); > + > + if (protocol >= 0x202) { > + stl_p(header+0x228, cmdline_addr); > + } else { > + stw_p(header+0x20, 0xA33F); > + stw_p(header+0x22, cmdline_addr-real_addr); > + } > + > + /* handle vga= parameter */ > + vmode = strstr(kernel_cmdline, "vga="); > + if (vmode) { > + unsigned int video_mode; > + /* skip "vga=" */ > + vmode += 4; > + if (!strncmp(vmode, "normal", 6)) { > + video_mode = 0xffff; > + } else if (!strncmp(vmode, "ext", 3)) { > + video_mode = 0xfffe; > + } else if (!strncmp(vmode, "ask", 3)) { > + video_mode = 0xfffd; > + } else { > + video_mode = strtol(vmode, NULL, 0); > + } > + stw_p(header+0x1fa, video_mode); > + } > + > + /* loader type */ > + /* High nybble = B reserved for QEMU; low nybble is revision number. > + If this code is substantially changed, you may want to consider > + incrementing the revision. */ > + if (protocol >= 0x200) { > + header[0x210] = 0xB0; > + } > + /* heap */ > + if (protocol >= 0x201) { > + header[0x211] |= 0x80; /* CAN_USE_HEAP */ > + stw_p(header+0x224, cmdline_addr-real_addr-0x200); > + } > + > + /* load initrd */ > + if (initrd_filename) { > + GMappedFile *mapped_file; > + gsize initrd_size; > + gchar *initrd_data; > + GError *gerr = NULL; > + > + if (protocol < 0x200) { > + fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n"); > + exit(1); > + } > + > + mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); > + if (!mapped_file) { > + fprintf(stderr, "qemu: error reading initrd %s: %s\n", > + initrd_filename, gerr->message); > + exit(1); > + } > + x86ms->initrd_mapped_file = mapped_file; > + > + initrd_data = g_mapped_file_get_contents(mapped_file); > + initrd_size = g_mapped_file_get_length(mapped_file); > + if (initrd_size >= initrd_max) { > + fprintf(stderr, "qemu: initrd is too large, cannot support." > + "(max: %"PRIu32", need %"PRId64")\n", > + initrd_max, (uint64_t)initrd_size); > + exit(1); > + } > + > + initrd_addr = (initrd_max-initrd_size) & ~4095; > + > + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); > + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); > + fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size); > + > + stl_p(header+0x218, initrd_addr); > + stl_p(header+0x21c, initrd_size); > + } > + > + /* load kernel and setup */ > + setup_size = header[0x1f1]; > + if (setup_size == 0) { > + setup_size = 4; > + } > + setup_size = (setup_size+1)*512; > + if (setup_size > kernel_size) { > + fprintf(stderr, "qemu: invalid kernel header\n"); > + exit(1); > + } > + kernel_size -= setup_size; > + > + setup = g_malloc(setup_size); > + kernel = g_malloc(kernel_size); > + fseek(f, 0, SEEK_SET); > + if (fread(setup, 1, setup_size, f) != setup_size) { > + fprintf(stderr, "fread() failed\n"); > + exit(1); > + } > + if (fread(kernel, 1, kernel_size, f) != kernel_size) { > + fprintf(stderr, "fread() failed\n"); > + exit(1); > + } > + fclose(f); > + > + /* append dtb to kernel */ > + if (dtb_filename) { > + if (protocol < 0x209) { > + fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n"); > + exit(1); > + } > + > + dtb_size = get_image_size(dtb_filename); > + if (dtb_size <= 0) { > + fprintf(stderr, "qemu: error reading dtb %s: %s\n", > + dtb_filename, strerror(errno)); > + exit(1); > + } > + > + setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); > + kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; > + kernel = g_realloc(kernel, kernel_size); > + > + stq_p(header+0x250, prot_addr + setup_data_offset); > + > + setup_data = (struct setup_data *)(kernel + setup_data_offset); > + setup_data->next = 0; > + setup_data->type = cpu_to_le32(SETUP_DTB); > + setup_data->len = cpu_to_le32(dtb_size); > + > + load_image_size(dtb_filename, setup_data->data, dtb_size); > + } > + > + memcpy(setup, header, MIN(sizeof(header), setup_size)); > + > + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); > + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); > + fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); > + > + fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); > + fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); > + fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); > + > + option_rom[nb_option_roms].bootindex = 0; > + option_rom[nb_option_roms].name = "linuxboot.bin"; > + if (linuxboot_dma_enabled && fw_cfg_dma_enabled(fw_cfg)) { > + option_rom[nb_option_roms].name = "linuxboot_dma.bin"; > + } > + nb_option_roms++; > +} > + > +static void x86_machine_get_max_ram_below_4g(Object *obj, Visitor *v, > + const char *name, void *opaque, > + Error **errp) > +{ > + X86MachineState *x86ms = X86_MACHINE(obj); > + uint64_t value = x86ms->max_ram_below_4g; > + > + visit_type_size(v, name, &value, errp); > +} > + > +static void x86_machine_set_max_ram_below_4g(Object *obj, Visitor *v, > + const char *name, void *opaque, > + Error **errp) > +{ > + X86MachineState *x86ms = X86_MACHINE(obj); > + Error *error = NULL; > + uint64_t value; > + > + visit_type_size(v, name, &value, &error); > + if (error) { > + error_propagate(errp, error); > + return; > + } > + if (value > 4 * GiB) { > + error_setg(&error, > + "Machine option 'max-ram-below-4g=%"PRIu64 > + "' expects size less than or equal to 4G", value); > + error_propagate(errp, error); > + return; > + } > + > + if (value < 1 * MiB) { > + warn_report("Only %" PRIu64 " bytes of RAM below the 4GiB boundary," > + "BIOS may not work with less than 1MiB", value); > + } > + > + x86ms->max_ram_below_4g = value; > +} > + > +static void x86_machine_initfn(Object *obj) > +{ > + X86MachineState *x86ms = X86_MACHINE(obj); > + > + x86ms->max_ram_below_4g = 0; /* use default */ > + x86ms->smp_dies = 1; > +} > + > +static void x86_machine_class_init(ObjectClass *oc, void *data) > +{ > + MachineClass *mc = MACHINE_CLASS(oc); > + > + mc->cpu_index_to_instance_props = x86_cpu_index_to_props; > + mc->get_default_cpu_node_id = x86_get_default_cpu_node_id; > + mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids; > + > + object_class_property_add(oc, X86_MACHINE_MAX_RAM_BELOW_4G, "size", > + x86_machine_get_max_ram_below_4g, x86_machine_set_max_ram_below_4g, > + NULL, NULL, &error_abort); > + > + object_class_property_set_description(oc, X86_MACHINE_MAX_RAM_BELOW_4G, > + "Maximum ram below the 4G boundary (32bit boundary)", &error_abort); > +} > + > +static const TypeInfo x86_machine_info = { > + .name = TYPE_X86_MACHINE, > + .parent = TYPE_MACHINE, > + .abstract = true, > + .instance_size = sizeof(X86MachineState), > + .instance_init = x86_machine_initfn, > + .class_size = sizeof(X86MachineClass), > + .class_init = x86_machine_class_init, > +}; > + > +static void x86_machine_register_types(void) > +{ > + type_register_static(&x86_machine_info); > +} > + > +type_init(x86_machine_register_types) > diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c > index 1ede055387..e621dde6c3 100644 > --- a/hw/intc/ioapic.c > +++ b/hw/intc/ioapic.c > @@ -23,6 +23,7 @@ > #include "qemu/osdep.h" > #include "qapi/error.h" > #include "monitor/monitor.h" > +#include "hw/i386/x86.h" > #include "hw/i386/pc.h" > #include "hw/i386/apic.h" > #include "hw/i386/ioapic.h" > @@ -89,7 +90,7 @@ static void ioapic_entry_parse(uint64_t entry, struct ioapic_entry_info *info) > > static void ioapic_service(IOAPICCommonState *s) > { > - AddressSpace *ioapic_as = PC_MACHINE(qdev_get_machine())->ioapic_as; > + AddressSpace *ioapic_as = X86_MACHINE(qdev_get_machine())->ioapic_as; > struct ioapic_entry_info info; > uint8_t i; > uint32_t mask; > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h > index 062feeb69e..de28d55e5c 100644 > --- a/include/hw/i386/pc.h > +++ b/include/hw/i386/pc.h > @@ -3,6 +3,7 @@ > > #include "exec/memory.h" > #include "hw/boards.h" > +#include "hw/i386/x86.h" > #include "hw/isa/isa.h" > #include "hw/block/fdc.h" > #include "hw/block/flash.h" > @@ -27,7 +28,7 @@ > */ > struct PCMachineState { > /*< private >*/ > - MachineState parent_obj; > + X86MachineState parent_obj; > > /* <public> */ > > @@ -36,15 +37,10 @@ struct PCMachineState { > > /* Pointers to devices and objects: */ > HotplugHandler *acpi_dev; > - ISADevice *rtc; > PCIBus *bus; > - FWCfgState *fw_cfg; > - qemu_irq *gsi; > PFlashCFI01 *flash[2]; > - GMappedFile *initrd_mapped_file; > > /* Configuration options: */ > - uint64_t max_ram_below_4g; > OnOffAuto vmport; > OnOffAuto smm; > > @@ -53,27 +49,13 @@ struct PCMachineState { > bool sata_enabled; > bool pit_enabled; > > - /* RAM information (sizes, addresses, configuration): */ > - ram_addr_t below_4g_mem_size, above_4g_mem_size; > - > - /* CPU and apic information: */ > - bool apic_xrupt_override; > - unsigned apic_id_limit; > - uint16_t boot_cpus; > - unsigned smp_dies; > - > /* NUMA information: */ > uint64_t numa_nodes; > uint64_t *node_mem; > - > - /* Address space used by IOAPIC device. All IOAPIC interrupts > - * will be translated to MSI messages in the address space. */ > - AddressSpace *ioapic_as; > }; > > #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device" > #define PC_MACHINE_DEVMEM_REGION_SIZE "device-memory-region-size" > -#define PC_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g" > #define PC_MACHINE_VMPORT "vmport" > #define PC_MACHINE_SMM "smm" > #define PC_MACHINE_SMBUS "smbus" > @@ -139,9 +121,6 @@ typedef struct PCMachineClass { > > /* use PVH to load kernels that support this feature */ > bool pvh_enabled; > - > - /* Enables contiguous-apic-ID mode */ > - bool compat_apic_id_mode; > } PCMachineClass; > > #define TYPE_PC_MACHINE "generic-pc-machine" > @@ -193,10 +172,6 @@ bool pc_machine_is_smm_enabled(PCMachineState *pcms); > void pc_register_ferr_irq(qemu_irq irq); > void pc_acpi_smi_interrupt(void *opaque, int irq, int level); > > -void pc_cpus_init(PCMachineState *pcms); > -void pc_hot_add_cpu(MachineState *ms, const int64_t id, Error **errp); > -void pc_smp_parse(MachineState *ms, QemuOpts *opts); > - > void pc_guest_info_init(PCMachineState *pcms); > > #define PCI_HOST_PROP_PCI_HOLE_START "pci-hole-start" > diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h > new file mode 100644 > index 0000000000..5980090b29 > --- /dev/null > +++ b/include/hw/i386/x86.h > @@ -0,0 +1,97 @@ > +/* > + * Copyright (c) 2019 Red Hat, Inc. > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms and conditions of the GNU General Public License, > + * version 2 or later, as published by the Free Software Foundation. > + * > + * This program is distributed in the hope it will be useful, but WITHOUT > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for > + * more details. > + * > + * You should have received a copy of the GNU General Public License along with > + * this program. If not, see <http://www.gnu.org/licenses/>. > + */ > + > +#ifndef HW_I386_X86_H > +#define HW_I386_X86_H > + > +#include "qemu-common.h" > +#include "exec/hwaddr.h" > +#include "qemu/notify.h" > + > +#include "hw/boards.h" > +#include "hw/nmi.h" > + > +typedef struct { > + /*< private >*/ > + MachineClass parent; > + > + /*< public >*/ > + > + /* Enables contiguous-apic-ID mode */ > + bool compat_apic_id_mode; > +} X86MachineClass; > + > +typedef struct { > + /*< private >*/ > + MachineState parent; > + > + /*< public >*/ > + > + /* Pointers to devices and objects: */ > + ISADevice *rtc; > + FWCfgState *fw_cfg; > + qemu_irq *gsi; > + GMappedFile *initrd_mapped_file; > + > + /* Configuration options: */ > + uint64_t max_ram_below_4g; > + > + /* RAM information (sizes, addresses, configuration): */ > + ram_addr_t below_4g_mem_size, above_4g_mem_size; > + > + /* CPU and apic information: */ > + bool apic_xrupt_override; > + unsigned apic_id_limit; > + uint16_t boot_cpus; > + unsigned smp_dies; > + > + /* Address space used by IOAPIC device. All IOAPIC interrupts > + * will be translated to MSI messages in the address space. */ > + AddressSpace *ioapic_as; > +} X86MachineState; > + > +#define X86_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g" > + > +#define TYPE_X86_MACHINE MACHINE_TYPE_NAME("x86") > +#define X86_MACHINE(obj) \ > + OBJECT_CHECK(X86MachineState, (obj), TYPE_X86_MACHINE) > +#define X86_MACHINE_GET_CLASS(obj) \ > + OBJECT_GET_CLASS(X86MachineClass, obj, TYPE_X86_MACHINE) > +#define X86_MACHINE_CLASS(class) \ > + OBJECT_CLASS_CHECK(X86MachineClass, class, TYPE_X86_MACHINE) > + > +uint32_t x86_cpu_apic_id_from_index(X86MachineState *x86ms, > + unsigned int cpu_index); > + > +void x86_cpus_init(X86MachineState *pcms, int default_cpu_version); > +void x86_hot_add_cpu(MachineState *ms, const int64_t id, Error **errp); > +void x86_smp_parse(MachineState *ms, QemuOpts *opts); > +void x86_nmi(NMIState *n, int cpu_index, Error **errp); > + > +CpuInstanceProperties x86_cpu_index_to_props(MachineState *ms, > + unsigned cpu_index); > +int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx); > +const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms); > + > +void x86_system_rom_init(MemoryRegion *rom_memory, bool isapc_ram_fw); > + > +void load_linux(X86MachineState *x86ms, Maybe rename x86_load_linux()? > + FWCfgState *fw_cfg, > + unsigned acpi_data_size, > + bool linuxboot_dma_enabled, > + bool pvh_enabled); > + > +#endif > Patch looks good, however I'd split it as: 1/ rename functions x86_* 2/ export functions, add "hw/i386/x86.h" 3/ move functions to hw/i386/x86.c 4/ add/use X86MachineState Anyhow if the maintainer is happy as it: Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
On 9/24/19 3:40 PM, Philippe Mathieu-Daudé wrote: > On 9/24/19 2:44 PM, Sergio Lopez wrote: >> Split up PCMachineState and PCMachineClass and derive X86MachineState >> and X86MachineClass from them. This allows sharing code with non-PC >> machine types. >> >> Also, move shared functions from pc.c to x86.c. >> >> Signed-off-by: Sergio Lopez <slp@redhat.com> >> --- >> hw/acpi/cpu_hotplug.c | 10 +- >> hw/i386/Makefile.objs | 1 + >> hw/i386/acpi-build.c | 31 +- >> hw/i386/amd_iommu.c | 4 +- >> hw/i386/intel_iommu.c | 4 +- >> hw/i386/pc.c | 796 +++++------------------------------------- >> hw/i386/pc_piix.c | 48 +-- >> hw/i386/pc_q35.c | 38 +- >> hw/i386/pc_sysfw.c | 60 +--- >> hw/i386/x86.c | 788 +++++++++++++++++++++++++++++++++++++++++ >> hw/intc/ioapic.c | 3 +- >> include/hw/i386/pc.h | 29 +- >> include/hw/i386/x86.h | 97 +++++ >> 13 files changed, 1045 insertions(+), 864 deletions(-) >> create mode 100644 hw/i386/x86.c >> create mode 100644 include/hw/i386/x86.h >> >> diff --git a/hw/acpi/cpu_hotplug.c b/hw/acpi/cpu_hotplug.c >> index 6e8293aac9..3ac2045a95 100644 >> --- a/hw/acpi/cpu_hotplug.c >> +++ b/hw/acpi/cpu_hotplug.c >> @@ -128,7 +128,7 @@ void build_legacy_cpu_hotplug_aml(Aml *ctx, MachineState *machine, >> Aml *one = aml_int(1); >> MachineClass *mc = MACHINE_GET_CLASS(machine); >> const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(machine); >> - PCMachineState *pcms = PC_MACHINE(machine); >> + X86MachineState *x86ms = X86_MACHINE(machine); >> >> /* >> * _MAT method - creates an madt apic buffer >> @@ -236,9 +236,9 @@ void build_legacy_cpu_hotplug_aml(Aml *ctx, MachineState *machine, >> /* The current AML generator can cover the APIC ID range [0..255], >> * inclusive, for VCPU hotplug. */ >> QEMU_BUILD_BUG_ON(ACPI_CPU_HOTPLUG_ID_LIMIT > 256); >> - if (pcms->apic_id_limit > ACPI_CPU_HOTPLUG_ID_LIMIT) { >> + if (x86ms->apic_id_limit > ACPI_CPU_HOTPLUG_ID_LIMIT) { >> error_report("max_cpus is too large. APIC ID of last CPU is %u", >> - pcms->apic_id_limit - 1); >> + x86ms->apic_id_limit - 1); >> exit(1); >> } >> >> @@ -315,8 +315,8 @@ void build_legacy_cpu_hotplug_aml(Aml *ctx, MachineState *machine, >> * ith up to 255 elements. Windows guests up to win2k8 fail when >> * VarPackageOp is used. >> */ >> - pkg = pcms->apic_id_limit <= 255 ? aml_package(pcms->apic_id_limit) : >> - aml_varpackage(pcms->apic_id_limit); >> + pkg = x86ms->apic_id_limit <= 255 ? aml_package(x86ms->apic_id_limit) : >> + aml_varpackage(x86ms->apic_id_limit); >> >> for (i = 0, apic_idx = 0; i < apic_ids->len; i++) { >> int apic_id = apic_ids->cpus[i].arch_id; >> diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs >> index 149712db07..5b4b3a672e 100644 >> --- a/hw/i386/Makefile.objs >> +++ b/hw/i386/Makefile.objs >> @@ -1,6 +1,7 @@ >> obj-$(CONFIG_KVM) += kvm/ >> obj-y += multiboot.o >> obj-y += pvh.o >> +obj-y += x86.o >> obj-y += pc.o >> obj-y += e820.o >> obj-$(CONFIG_I440FX) += pc_piix.o >> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c >> index e54e571a75..76e18d3285 100644 >> --- a/hw/i386/acpi-build.c >> +++ b/hw/i386/acpi-build.c >> @@ -29,6 +29,7 @@ >> #include "hw/pci/pci.h" >> #include "hw/core/cpu.h" >> #include "target/i386/cpu.h" >> +#include "hw/i386/x86.h" >> #include "hw/misc/pvpanic.h" >> #include "hw/timer/hpet.h" >> #include "hw/acpi/acpi-defs.h" >> @@ -361,6 +362,7 @@ static void >> build_madt(GArray *table_data, BIOSLinker *linker, PCMachineState *pcms) >> { >> MachineClass *mc = MACHINE_GET_CLASS(pcms); >> + X86MachineState *x86ms = X86_MACHINE(pcms); >> const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(MACHINE(pcms)); >> int madt_start = table_data->len; >> AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(pcms->acpi_dev); >> @@ -390,7 +392,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, PCMachineState *pcms) >> io_apic->address = cpu_to_le32(IO_APIC_DEFAULT_ADDRESS); >> io_apic->interrupt = cpu_to_le32(0); >> >> - if (pcms->apic_xrupt_override) { >> + if (x86ms->apic_xrupt_override) { >> intsrcovr = acpi_data_push(table_data, sizeof *intsrcovr); >> intsrcovr->type = ACPI_APIC_XRUPT_OVERRIDE; >> intsrcovr->length = sizeof(*intsrcovr); >> @@ -1817,8 +1819,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, >> CrsRangeEntry *entry; >> Aml *dsdt, *sb_scope, *scope, *dev, *method, *field, *pkg, *crs; >> CrsRangeSet crs_range_set; >> - PCMachineState *pcms = PC_MACHINE(machine); >> PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine); >> + X86MachineState *x86ms = X86_MACHINE(machine); >> AcpiMcfgInfo mcfg; >> uint32_t nr_mem = machine->ram_slots; >> int root_bus_limit = 0xFF; >> @@ -2083,7 +2085,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, >> * with half of the 16-bit control register. Hence, the total size >> * of the i/o region used is FW_CFG_CTL_SIZE; when using DMA, the >> * DMA control register is located at FW_CFG_DMA_IO_BASE + 4 */ >> - uint8_t io_size = object_property_get_bool(OBJECT(pcms->fw_cfg), >> + uint8_t io_size = object_property_get_bool(OBJECT(x86ms->fw_cfg), >> "dma_enabled", NULL) ? >> ROUND_UP(FW_CFG_CTL_SIZE, 4) + sizeof(dma_addr_t) : >> FW_CFG_CTL_SIZE; >> @@ -2318,6 +2320,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) >> MachineClass *mc = MACHINE_GET_CLASS(machine); >> const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(machine); >> PCMachineState *pcms = PC_MACHINE(machine); >> + X86MachineState *x86ms = X86_MACHINE(machine); >> ram_addr_t hotplugabble_address_space_size = >> object_property_get_int(OBJECT(pcms), PC_MACHINE_DEVMEM_REGION_SIZE, >> NULL); >> @@ -2386,16 +2389,16 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) >> } >> >> /* Cut out the ACPI_PCI hole */ >> - if (mem_base <= pcms->below_4g_mem_size && >> - next_base > pcms->below_4g_mem_size) { >> - mem_len -= next_base - pcms->below_4g_mem_size; >> + if (mem_base <= x86ms->below_4g_mem_size && >> + next_base > x86ms->below_4g_mem_size) { >> + mem_len -= next_base - x86ms->below_4g_mem_size; >> if (mem_len > 0) { >> numamem = acpi_data_push(table_data, sizeof *numamem); >> build_srat_memory(numamem, mem_base, mem_len, i - 1, >> MEM_AFFINITY_ENABLED); >> } >> mem_base = 1ULL << 32; >> - mem_len = next_base - pcms->below_4g_mem_size; >> + mem_len = next_base - x86ms->below_4g_mem_size; >> next_base = mem_base + mem_len; >> } >> >> @@ -2614,6 +2617,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) >> { >> PCMachineState *pcms = PC_MACHINE(machine); >> PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); >> + X86MachineState *x86ms = X86_MACHINE(machine); >> GArray *table_offsets; >> unsigned facs, dsdt, rsdt, fadt; >> AcpiPmInfo pm; >> @@ -2775,7 +2779,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) >> */ >> int legacy_aml_len = >> pcmc->legacy_acpi_table_size + >> - ACPI_BUILD_LEGACY_CPU_AML_SIZE * pcms->apic_id_limit; >> + ACPI_BUILD_LEGACY_CPU_AML_SIZE * x86ms->apic_id_limit; >> int legacy_table_size = >> ROUND_UP(tables_blob->len - aml_len + legacy_aml_len, >> ACPI_BUILD_ALIGN_SIZE); >> @@ -2865,13 +2869,14 @@ void acpi_setup(void) >> { >> PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); >> PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); >> + X86MachineState *x86ms = X86_MACHINE(pcms); >> AcpiBuildTables tables; >> AcpiBuildState *build_state; >> Object *vmgenid_dev; >> TPMIf *tpm; >> static FwCfgTPMConfig tpm_config; >> >> - if (!pcms->fw_cfg) { >> + if (!x86ms->fw_cfg) { >> ACPI_BUILD_DPRINTF("No fw cfg. Bailing out.\n"); >> return; >> } >> @@ -2902,7 +2907,7 @@ void acpi_setup(void) >> acpi_add_rom_blob(acpi_build_update, build_state, >> tables.linker->cmd_blob, "etc/table-loader", 0); >> >> - fw_cfg_add_file(pcms->fw_cfg, ACPI_BUILD_TPMLOG_FILE, >> + fw_cfg_add_file(x86ms->fw_cfg, ACPI_BUILD_TPMLOG_FILE, >> tables.tcpalog->data, acpi_data_len(tables.tcpalog)); >> >> tpm = tpm_find(); >> @@ -2912,13 +2917,13 @@ void acpi_setup(void) >> .tpm_version = tpm_get_version(tpm), >> .tpmppi_version = TPM_PPI_VERSION_1_30 >> }; >> - fw_cfg_add_file(pcms->fw_cfg, "etc/tpm/config", >> + fw_cfg_add_file(x86ms->fw_cfg, "etc/tpm/config", >> &tpm_config, sizeof tpm_config); >> } >> >> vmgenid_dev = find_vmgenid_dev(); >> if (vmgenid_dev) { >> - vmgenid_add_fw_cfg(VMGENID(vmgenid_dev), pcms->fw_cfg, >> + vmgenid_add_fw_cfg(VMGENID(vmgenid_dev), x86ms->fw_cfg, >> tables.vmgenid); >> } >> >> @@ -2931,7 +2936,7 @@ void acpi_setup(void) >> uint32_t rsdp_size = acpi_data_len(tables.rsdp); >> >> build_state->rsdp = g_memdup(tables.rsdp->data, rsdp_size); >> - fw_cfg_add_file_callback(pcms->fw_cfg, ACPI_BUILD_RSDP_FILE, >> + fw_cfg_add_file_callback(x86ms->fw_cfg, ACPI_BUILD_RSDP_FILE, >> acpi_build_update, NULL, build_state, >> build_state->rsdp, rsdp_size, true); >> build_state->rsdp_mr = NULL; >> diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c >> index 08884523e2..bb3b5b4563 100644 >> --- a/hw/i386/amd_iommu.c >> +++ b/hw/i386/amd_iommu.c >> @@ -21,6 +21,7 @@ >> */ >> >> #include "qemu/osdep.h" >> +#include "hw/i386/x86.h" >> #include "hw/i386/pc.h" >> #include "hw/pci/msi.h" >> #include "hw/pci/pci_bus.h" >> @@ -1537,6 +1538,7 @@ static void amdvi_realize(DeviceState *dev, Error **err) >> X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); >> MachineState *ms = MACHINE(qdev_get_machine()); >> PCMachineState *pcms = PC_MACHINE(ms); >> + X86MachineState *x86ms = X86_MACHINE(ms); >> PCIBus *bus = pcms->bus; >> >> s->iotlb = g_hash_table_new_full(amdvi_uint64_hash, >> @@ -1565,7 +1567,7 @@ static void amdvi_realize(DeviceState *dev, Error **err) >> } >> >> /* Pseudo address space under root PCI bus. */ >> - pcms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID); >> + x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID); >> >> /* set up MMIO */ >> memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, "amdvi-mmio", >> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c >> index 75ca6f9c70..21f091c654 100644 >> --- a/hw/i386/intel_iommu.c >> +++ b/hw/i386/intel_iommu.c >> @@ -29,6 +29,7 @@ >> #include "hw/pci/pci.h" >> #include "hw/pci/pci_bus.h" >> #include "hw/qdev-properties.h" >> +#include "hw/i386/x86.h" >> #include "hw/i386/pc.h" >> #include "hw/i386/apic-msidef.h" >> #include "hw/boards.h" >> @@ -3703,6 +3704,7 @@ static void vtd_realize(DeviceState *dev, Error **errp) >> { >> MachineState *ms = MACHINE(qdev_get_machine()); >> PCMachineState *pcms = PC_MACHINE(ms); >> + X86MachineState *x86ms = X86_MACHINE(ms); >> PCIBus *bus = pcms->bus; >> IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev); >> X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); >> @@ -3743,7 +3745,7 @@ static void vtd_realize(DeviceState *dev, Error **errp) >> sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR); >> pci_setup_iommu(bus, vtd_host_dma_iommu, dev); >> /* Pseudo address space under root PCI bus. */ >> - pcms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC); >> + x86ms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC); >> } >> >> static void vtd_class_init(ObjectClass *klass, void *data) >> diff --git a/hw/i386/pc.c b/hw/i386/pc.c >> index 3920aa7e85..d18b461f01 100644 >> --- a/hw/i386/pc.c >> +++ b/hw/i386/pc.c >> @@ -24,6 +24,7 @@ >> >> #include "qemu/osdep.h" >> #include "qemu/units.h" >> +#include "hw/i386/x86.h" >> #include "hw/i386/pc.h" >> #include "hw/char/serial.h" >> #include "hw/char/parallel.h" >> @@ -676,6 +677,7 @@ void pc_cmos_init(PCMachineState *pcms, >> BusState *idebus0, BusState *idebus1, >> ISADevice *s) >> { >> + X86MachineState *x86ms = X86_MACHINE(pcms); >> int val; >> static pc_cmos_init_late_arg arg; >> >> @@ -683,12 +685,12 @@ void pc_cmos_init(PCMachineState *pcms, >> >> /* memory size */ >> /* base memory (first MiB) */ >> - val = MIN(pcms->below_4g_mem_size / KiB, 640); >> + val = MIN(x86ms->below_4g_mem_size / KiB, 640); >> rtc_set_memory(s, 0x15, val); >> rtc_set_memory(s, 0x16, val >> 8); >> /* extended memory (next 64MiB) */ >> - if (pcms->below_4g_mem_size > 1 * MiB) { >> - val = (pcms->below_4g_mem_size - 1 * MiB) / KiB; >> + if (x86ms->below_4g_mem_size > 1 * MiB) { >> + val = (x86ms->below_4g_mem_size - 1 * MiB) / KiB; >> } else { >> val = 0; >> } >> @@ -699,8 +701,8 @@ void pc_cmos_init(PCMachineState *pcms, >> rtc_set_memory(s, 0x30, val); >> rtc_set_memory(s, 0x31, val >> 8); >> /* memory between 16MiB and 4GiB */ >> - if (pcms->below_4g_mem_size > 16 * MiB) { >> - val = (pcms->below_4g_mem_size - 16 * MiB) / (64 * KiB); >> + if (x86ms->below_4g_mem_size > 16 * MiB) { >> + val = (x86ms->below_4g_mem_size - 16 * MiB) / (64 * KiB); >> } else { >> val = 0; >> } >> @@ -709,20 +711,20 @@ void pc_cmos_init(PCMachineState *pcms, >> rtc_set_memory(s, 0x34, val); >> rtc_set_memory(s, 0x35, val >> 8); >> /* memory above 4GiB */ >> - val = pcms->above_4g_mem_size / 65536; >> + val = x86ms->above_4g_mem_size / 65536; >> rtc_set_memory(s, 0x5b, val); >> rtc_set_memory(s, 0x5c, val >> 8); >> rtc_set_memory(s, 0x5d, val >> 16); >> >> - object_property_add_link(OBJECT(pcms), "rtc_state", >> + object_property_add_link(OBJECT(x86ms), "rtc_state", >> TYPE_ISA_DEVICE, >> - (Object **)&pcms->rtc, >> + (Object **)&x86ms->rtc, >> object_property_allow_set_link, >> OBJ_PROP_LINK_STRONG, &error_abort); >> - object_property_set_link(OBJECT(pcms), OBJECT(s), >> + object_property_set_link(OBJECT(x86ms), OBJECT(s), >> "rtc_state", &error_abort); >> >> - set_boot_dev(s, MACHINE(pcms)->boot_order, &error_fatal); >> + set_boot_dev(s, MACHINE(x86ms)->boot_order, &error_fatal); >> >> val = 0; >> val |= 0x02; /* FPU is there */ >> @@ -863,35 +865,6 @@ static void handle_a20_line_change(void *opaque, int irq, int level) >> x86_cpu_set_a20(cpu, level); >> } >> >> -/* Calculates initial APIC ID for a specific CPU index >> - * >> - * Currently we need to be able to calculate the APIC ID from the CPU index >> - * alone (without requiring a CPU object), as the QEMU<->Seabios interfaces have >> - * no concept of "CPU index", and the NUMA tables on fw_cfg need the APIC ID of >> - * all CPUs up to max_cpus. >> - */ >> -static uint32_t x86_cpu_apic_id_from_index(PCMachineState *pcms, >> - unsigned int cpu_index) >> -{ >> - MachineState *ms = MACHINE(pcms); >> - PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); >> - uint32_t correct_id; >> - static bool warned; >> - >> - correct_id = x86_apicid_from_cpu_idx(pcms->smp_dies, ms->smp.cores, >> - ms->smp.threads, cpu_index); >> - if (pcmc->compat_apic_id_mode) { >> - if (cpu_index != correct_id && !warned && !qtest_enabled()) { >> - error_report("APIC IDs set in compatibility mode, " >> - "CPU topology won't match the configuration"); >> - warned = true; >> - } >> - return cpu_index; >> - } else { >> - return correct_id; >> - } >> -} >> - >> static void pc_build_smbios(PCMachineState *pcms) >> { >> uint8_t *smbios_tables, *smbios_anchor; >> @@ -899,6 +872,7 @@ static void pc_build_smbios(PCMachineState *pcms) >> struct smbios_phys_mem_area *mem_array; >> unsigned i, array_count; >> MachineState *ms = MACHINE(pcms); >> + X86MachineState *x86ms = X86_MACHINE(pcms); >> X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu); >> >> /* tell smbios about cpuid version and features */ >> @@ -906,7 +880,7 @@ static void pc_build_smbios(PCMachineState *pcms) >> >> smbios_tables = smbios_get_table_legacy(ms, &smbios_tables_len); >> if (smbios_tables) { >> - fw_cfg_add_bytes(pcms->fw_cfg, FW_CFG_SMBIOS_ENTRIES, >> + fw_cfg_add_bytes(x86ms->fw_cfg, FW_CFG_SMBIOS_ENTRIES, >> smbios_tables, smbios_tables_len); >> } >> >> @@ -927,9 +901,9 @@ static void pc_build_smbios(PCMachineState *pcms) >> g_free(mem_array); >> >> if (smbios_anchor) { >> - fw_cfg_add_file(pcms->fw_cfg, "etc/smbios/smbios-tables", >> + fw_cfg_add_file(x86ms->fw_cfg, "etc/smbios/smbios-tables", >> smbios_tables, smbios_tables_len); >> - fw_cfg_add_file(pcms->fw_cfg, "etc/smbios/smbios-anchor", >> + fw_cfg_add_file(x86ms->fw_cfg, "etc/smbios/smbios-anchor", >> smbios_anchor, smbios_anchor_len); >> } >> } >> @@ -942,10 +916,11 @@ static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms) >> const CPUArchIdList *cpus; >> MachineClass *mc = MACHINE_GET_CLASS(pcms); >> MachineState *ms = MACHINE(pcms); >> + X86MachineState *x86ms = X86_MACHINE(pcms); >> int nb_numa_nodes = ms->numa_state->num_nodes; >> >> fw_cfg = fw_cfg_init_io_dma(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4, as); >> - fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus); >> + fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); >> >> /* FW_CFG_MAX_CPUS is a bit confusing/problematic on x86: >> * >> @@ -959,7 +934,7 @@ static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms) >> * So for compatibility reasons with old BIOSes we are stuck with >> * "etc/max-cpus" actually being apic_id_limit >> */ >> - fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)pcms->apic_id_limit); >> + fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)x86ms->apic_id_limit); >> fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size); >> fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES, >> acpi_tables, acpi_tables_len); >> @@ -972,374 +947,25 @@ static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms) >> * of nodes, one word for each VCPU->node and one word for each node to >> * hold the amount of memory. >> */ >> - numa_fw_cfg = g_new0(uint64_t, 1 + pcms->apic_id_limit + nb_numa_nodes); >> + numa_fw_cfg = g_new0(uint64_t, 1 + x86ms->apic_id_limit + nb_numa_nodes); >> numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes); >> cpus = mc->possible_cpu_arch_ids(MACHINE(pcms)); >> for (i = 0; i < cpus->len; i++) { >> unsigned int apic_id = cpus->cpus[i].arch_id; >> - assert(apic_id < pcms->apic_id_limit); >> + assert(apic_id < x86ms->apic_id_limit); >> numa_fw_cfg[apic_id + 1] = cpu_to_le64(cpus->cpus[i].props.node_id); >> } >> for (i = 0; i < nb_numa_nodes; i++) { >> - numa_fw_cfg[pcms->apic_id_limit + 1 + i] = >> + numa_fw_cfg[x86ms->apic_id_limit + 1 + i] = >> cpu_to_le64(ms->numa_state->nodes[i].node_mem); >> } >> fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg, >> - (1 + pcms->apic_id_limit + nb_numa_nodes) * >> + (1 + x86ms->apic_id_limit + nb_numa_nodes) * >> sizeof(*numa_fw_cfg)); >> >> return fw_cfg; >> } >> >> -static long get_file_size(FILE *f) >> -{ >> - long where, size; >> - >> - /* XXX: on Unix systems, using fstat() probably makes more sense */ >> - >> - where = ftell(f); >> - fseek(f, 0, SEEK_END); >> - size = ftell(f); >> - fseek(f, where, SEEK_SET); >> - >> - return size; >> -} >> - >> -struct setup_data { >> - uint64_t next; >> - uint32_t type; >> - uint32_t len; >> - uint8_t data[0]; >> -} __attribute__((packed)); >> - >> -static void load_linux(PCMachineState *pcms, >> - FWCfgState *fw_cfg) >> -{ >> - uint16_t protocol; >> - int setup_size, kernel_size, cmdline_size; >> - int dtb_size, setup_data_offset; >> - uint32_t initrd_max; >> - uint8_t header[8192], *setup, *kernel; >> - hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; >> - FILE *f; >> - char *vmode; >> - MachineState *machine = MACHINE(pcms); >> - PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); >> - struct setup_data *setup_data; >> - const char *kernel_filename = machine->kernel_filename; >> - const char *initrd_filename = machine->initrd_filename; >> - const char *dtb_filename = machine->dtb; >> - const char *kernel_cmdline = machine->kernel_cmdline; >> - >> - /* Align to 16 bytes as a paranoia measure */ >> - cmdline_size = (strlen(kernel_cmdline)+16) & ~15; >> - >> - /* load the kernel header */ >> - f = fopen(kernel_filename, "rb"); >> - if (!f || !(kernel_size = get_file_size(f)) || >> - fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != >> - MIN(ARRAY_SIZE(header), kernel_size)) { >> - fprintf(stderr, "qemu: could not load kernel '%s': %s\n", >> - kernel_filename, strerror(errno)); >> - exit(1); >> - } >> - >> - /* kernel protocol version */ >> -#if 0 >> - fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202)); >> -#endif >> - if (ldl_p(header+0x202) == 0x53726448) { >> - protocol = lduw_p(header+0x206); >> - } else { >> - size_t pvh_start_addr; >> - uint32_t mh_load_addr = 0; >> - uint32_t elf_kernel_size = 0; >> - /* >> - * This could be a multiboot kernel. If it is, let's stop treating it >> - * like a Linux kernel. >> - * Note: some multiboot images could be in the ELF format (the same of >> - * PVH), so we try multiboot first since we check the multiboot magic >> - * header before to load it. >> - */ >> - if (load_multiboot(fw_cfg, f, kernel_filename, initrd_filename, >> - kernel_cmdline, kernel_size, header)) { >> - return; >> - } >> - /* >> - * Check if the file is an uncompressed kernel file (ELF) and load it, >> - * saving the PVH entry point used by the x86/HVM direct boot ABI. >> - * If load_elfboot() is successful, populate the fw_cfg info. >> - */ >> - if (pcmc->pvh_enabled && >> - pvh_load_elfboot(kernel_filename, >> - &mh_load_addr, &elf_kernel_size)) { >> - fclose(f); >> - >> - pvh_start_addr = pvh_get_start_addr(); >> - >> - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr); >> - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); >> - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size); >> - >> - fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, >> - strlen(kernel_cmdline) + 1); >> - fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); >> - >> - fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header)); >> - fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, >> - header, sizeof(header)); >> - >> - /* load initrd */ >> - if (initrd_filename) { >> - GMappedFile *mapped_file; >> - gsize initrd_size; >> - gchar *initrd_data; >> - GError *gerr = NULL; >> - >> - mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); >> - if (!mapped_file) { >> - fprintf(stderr, "qemu: error reading initrd %s: %s\n", >> - initrd_filename, gerr->message); >> - exit(1); >> - } >> - pcms->initrd_mapped_file = mapped_file; >> - >> - initrd_data = g_mapped_file_get_contents(mapped_file); >> - initrd_size = g_mapped_file_get_length(mapped_file); >> - initrd_max = pcms->below_4g_mem_size - pcmc->acpi_data_size - 1; >> - if (initrd_size >= initrd_max) { >> - fprintf(stderr, "qemu: initrd is too large, cannot support." >> - "(max: %"PRIu32", need %"PRId64")\n", >> - initrd_max, (uint64_t)initrd_size); >> - exit(1); >> - } >> - >> - initrd_addr = (initrd_max - initrd_size) & ~4095; >> - >> - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); >> - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); >> - fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, >> - initrd_size); >> - } >> - >> - option_rom[nb_option_roms].bootindex = 0; >> - option_rom[nb_option_roms].name = "pvh.bin"; >> - nb_option_roms++; >> - >> - return; >> - } >> - protocol = 0; >> - } >> - >> - if (protocol < 0x200 || !(header[0x211] & 0x01)) { >> - /* Low kernel */ >> - real_addr = 0x90000; >> - cmdline_addr = 0x9a000 - cmdline_size; >> - prot_addr = 0x10000; >> - } else if (protocol < 0x202) { >> - /* High but ancient kernel */ >> - real_addr = 0x90000; >> - cmdline_addr = 0x9a000 - cmdline_size; >> - prot_addr = 0x100000; >> - } else { >> - /* High and recent kernel */ >> - real_addr = 0x10000; >> - cmdline_addr = 0x20000; >> - prot_addr = 0x100000; >> - } >> - >> -#if 0 >> - fprintf(stderr, >> - "qemu: real_addr = 0x" TARGET_FMT_plx "\n" >> - "qemu: cmdline_addr = 0x" TARGET_FMT_plx "\n" >> - "qemu: prot_addr = 0x" TARGET_FMT_plx "\n", >> - real_addr, >> - cmdline_addr, >> - prot_addr); >> -#endif >> - >> - /* highest address for loading the initrd */ >> - if (protocol >= 0x20c && >> - lduw_p(header+0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) { >> - /* >> - * Linux has supported initrd up to 4 GB for a very long time (2007, >> - * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013), >> - * though it only sets initrd_max to 2 GB to "work around bootloader >> - * bugs". Luckily, QEMU firmware(which does something like bootloader) >> - * has supported this. >> - * >> - * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can >> - * be loaded into any address. >> - * >> - * In addition, initrd_max is uint32_t simply because QEMU doesn't >> - * support the 64-bit boot protocol (specifically the ext_ramdisk_image >> - * field). >> - * >> - * Therefore here just limit initrd_max to UINT32_MAX simply as well. >> - */ >> - initrd_max = UINT32_MAX; >> - } else if (protocol >= 0x203) { >> - initrd_max = ldl_p(header+0x22c); >> - } else { >> - initrd_max = 0x37ffffff; >> - } >> - >> - if (initrd_max >= pcms->below_4g_mem_size - pcmc->acpi_data_size) { >> - initrd_max = pcms->below_4g_mem_size - pcmc->acpi_data_size - 1; >> - } >> - >> - fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); >> - fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline)+1); >> - fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); >> - >> - if (protocol >= 0x202) { >> - stl_p(header+0x228, cmdline_addr); >> - } else { >> - stw_p(header+0x20, 0xA33F); >> - stw_p(header+0x22, cmdline_addr-real_addr); >> - } >> - >> - /* handle vga= parameter */ >> - vmode = strstr(kernel_cmdline, "vga="); >> - if (vmode) { >> - unsigned int video_mode; >> - /* skip "vga=" */ >> - vmode += 4; >> - if (!strncmp(vmode, "normal", 6)) { >> - video_mode = 0xffff; >> - } else if (!strncmp(vmode, "ext", 3)) { >> - video_mode = 0xfffe; >> - } else if (!strncmp(vmode, "ask", 3)) { >> - video_mode = 0xfffd; >> - } else { >> - video_mode = strtol(vmode, NULL, 0); >> - } >> - stw_p(header+0x1fa, video_mode); >> - } >> - >> - /* loader type */ >> - /* High nybble = B reserved for QEMU; low nybble is revision number. >> - If this code is substantially changed, you may want to consider >> - incrementing the revision. */ >> - if (protocol >= 0x200) { >> - header[0x210] = 0xB0; >> - } >> - /* heap */ >> - if (protocol >= 0x201) { >> - header[0x211] |= 0x80; /* CAN_USE_HEAP */ >> - stw_p(header+0x224, cmdline_addr-real_addr-0x200); >> - } >> - >> - /* load initrd */ >> - if (initrd_filename) { >> - GMappedFile *mapped_file; >> - gsize initrd_size; >> - gchar *initrd_data; >> - GError *gerr = NULL; >> - >> - if (protocol < 0x200) { >> - fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n"); >> - exit(1); >> - } >> - >> - mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); >> - if (!mapped_file) { >> - fprintf(stderr, "qemu: error reading initrd %s: %s\n", >> - initrd_filename, gerr->message); >> - exit(1); >> - } >> - pcms->initrd_mapped_file = mapped_file; >> - >> - initrd_data = g_mapped_file_get_contents(mapped_file); >> - initrd_size = g_mapped_file_get_length(mapped_file); >> - if (initrd_size >= initrd_max) { >> - fprintf(stderr, "qemu: initrd is too large, cannot support." >> - "(max: %"PRIu32", need %"PRId64")\n", >> - initrd_max, (uint64_t)initrd_size); >> - exit(1); >> - } >> - >> - initrd_addr = (initrd_max-initrd_size) & ~4095; >> - >> - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); >> - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); >> - fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size); >> - >> - stl_p(header+0x218, initrd_addr); >> - stl_p(header+0x21c, initrd_size); >> - } >> - >> - /* load kernel and setup */ >> - setup_size = header[0x1f1]; >> - if (setup_size == 0) { >> - setup_size = 4; >> - } >> - setup_size = (setup_size+1)*512; >> - if (setup_size > kernel_size) { >> - fprintf(stderr, "qemu: invalid kernel header\n"); >> - exit(1); >> - } >> - kernel_size -= setup_size; >> - >> - setup = g_malloc(setup_size); >> - kernel = g_malloc(kernel_size); >> - fseek(f, 0, SEEK_SET); >> - if (fread(setup, 1, setup_size, f) != setup_size) { >> - fprintf(stderr, "fread() failed\n"); >> - exit(1); >> - } >> - if (fread(kernel, 1, kernel_size, f) != kernel_size) { >> - fprintf(stderr, "fread() failed\n"); >> - exit(1); >> - } >> - fclose(f); >> - >> - /* append dtb to kernel */ >> - if (dtb_filename) { >> - if (protocol < 0x209) { >> - fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n"); >> - exit(1); >> - } >> - >> - dtb_size = get_image_size(dtb_filename); >> - if (dtb_size <= 0) { >> - fprintf(stderr, "qemu: error reading dtb %s: %s\n", >> - dtb_filename, strerror(errno)); >> - exit(1); >> - } >> - >> - setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); >> - kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; >> - kernel = g_realloc(kernel, kernel_size); >> - >> - stq_p(header+0x250, prot_addr + setup_data_offset); >> - >> - setup_data = (struct setup_data *)(kernel + setup_data_offset); >> - setup_data->next = 0; >> - setup_data->type = cpu_to_le32(SETUP_DTB); >> - setup_data->len = cpu_to_le32(dtb_size); >> - >> - load_image_size(dtb_filename, setup_data->data, dtb_size); >> - } >> - >> - memcpy(setup, header, MIN(sizeof(header), setup_size)); >> - >> - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); >> - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); >> - fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); >> - >> - fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); >> - fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); >> - fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); >> - >> - option_rom[nb_option_roms].bootindex = 0; >> - option_rom[nb_option_roms].name = "linuxboot.bin"; >> - if (pcmc->linuxboot_dma_enabled && fw_cfg_dma_enabled(fw_cfg)) { >> - option_rom[nb_option_roms].name = "linuxboot_dma.bin"; >> - } >> - nb_option_roms++; >> -} >> - >> #define NE2000_NB_MAX 6 >> >> static const int ne2000_io[NE2000_NB_MAX] = { 0x300, 0x320, 0x340, 0x360, >> @@ -1376,157 +1002,10 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level) >> } >> } >> >> -static void pc_new_cpu(PCMachineState *pcms, int64_t apic_id, Error **errp) >> -{ >> - Object *cpu = NULL; >> - Error *local_err = NULL; >> - CPUX86State *env = NULL; >> - >> - cpu = object_new(MACHINE(pcms)->cpu_type); >> - >> - env = &X86_CPU(cpu)->env; >> - env->nr_dies = pcms->smp_dies; >> - >> - object_property_set_uint(cpu, apic_id, "apic-id", &local_err); >> - object_property_set_bool(cpu, true, "realized", &local_err); >> - >> - object_unref(cpu); >> - error_propagate(errp, local_err); >> -} >> - >> -/* >> - * This function is very similar to smp_parse() >> - * in hw/core/machine.c but includes CPU die support. >> - */ >> -void pc_smp_parse(MachineState *ms, QemuOpts *opts) >> -{ >> - PCMachineState *pcms = PC_MACHINE(ms); >> - >> - if (opts) { >> - unsigned cpus = qemu_opt_get_number(opts, "cpus", 0); >> - unsigned sockets = qemu_opt_get_number(opts, "sockets", 0); >> - unsigned dies = qemu_opt_get_number(opts, "dies", 1); >> - unsigned cores = qemu_opt_get_number(opts, "cores", 0); >> - unsigned threads = qemu_opt_get_number(opts, "threads", 0); >> - >> - /* compute missing values, prefer sockets over cores over threads */ >> - if (cpus == 0 || sockets == 0) { >> - cores = cores > 0 ? cores : 1; >> - threads = threads > 0 ? threads : 1; >> - if (cpus == 0) { >> - sockets = sockets > 0 ? sockets : 1; >> - cpus = cores * threads * dies * sockets; >> - } else { >> - ms->smp.max_cpus = >> - qemu_opt_get_number(opts, "maxcpus", cpus); >> - sockets = ms->smp.max_cpus / (cores * threads * dies); >> - } >> - } else if (cores == 0) { >> - threads = threads > 0 ? threads : 1; >> - cores = cpus / (sockets * dies * threads); >> - cores = cores > 0 ? cores : 1; >> - } else if (threads == 0) { >> - threads = cpus / (cores * dies * sockets); >> - threads = threads > 0 ? threads : 1; >> - } else if (sockets * dies * cores * threads < cpus) { >> - error_report("cpu topology: " >> - "sockets (%u) * dies (%u) * cores (%u) * threads (%u) < " >> - "smp_cpus (%u)", >> - sockets, dies, cores, threads, cpus); >> - exit(1); >> - } >> - >> - ms->smp.max_cpus = >> - qemu_opt_get_number(opts, "maxcpus", cpus); >> - >> - if (ms->smp.max_cpus < cpus) { >> - error_report("maxcpus must be equal to or greater than smp"); >> - exit(1); >> - } >> - >> - if (sockets * dies * cores * threads > ms->smp.max_cpus) { >> - error_report("cpu topology: " >> - "sockets (%u) * dies (%u) * cores (%u) * threads (%u) > " >> - "maxcpus (%u)", >> - sockets, dies, cores, threads, >> - ms->smp.max_cpus); >> - exit(1); >> - } >> - >> - if (sockets * dies * cores * threads != ms->smp.max_cpus) { >> - warn_report("Invalid CPU topology deprecated: " >> - "sockets (%u) * dies (%u) * cores (%u) * threads (%u) " >> - "!= maxcpus (%u)", >> - sockets, dies, cores, threads, >> - ms->smp.max_cpus); >> - } >> - >> - ms->smp.cpus = cpus; >> - ms->smp.cores = cores; >> - ms->smp.threads = threads; >> - pcms->smp_dies = dies; >> - } >> - >> - if (ms->smp.cpus > 1) { >> - Error *blocker = NULL; >> - error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED, "smp"); >> - replay_add_blocker(blocker); >> - } >> -} >> - >> -void pc_hot_add_cpu(MachineState *ms, const int64_t id, Error **errp) >> -{ >> - PCMachineState *pcms = PC_MACHINE(ms); >> - int64_t apic_id = x86_cpu_apic_id_from_index(pcms, id); >> - Error *local_err = NULL; >> - >> - if (id < 0) { >> - error_setg(errp, "Invalid CPU id: %" PRIi64, id); >> - return; >> - } >> - >> - if (apic_id >= ACPI_CPU_HOTPLUG_ID_LIMIT) { >> - error_setg(errp, "Unable to add CPU: %" PRIi64 >> - ", resulting APIC ID (%" PRIi64 ") is too large", >> - id, apic_id); >> - return; >> - } >> - >> - pc_new_cpu(PC_MACHINE(ms), apic_id, &local_err); >> - if (local_err) { >> - error_propagate(errp, local_err); >> - return; >> - } >> -} >> - >> -void pc_cpus_init(PCMachineState *pcms) >> -{ >> - int i; >> - const CPUArchIdList *possible_cpus; >> - MachineState *ms = MACHINE(pcms); >> - MachineClass *mc = MACHINE_GET_CLASS(pcms); >> - PCMachineClass *pcmc = PC_MACHINE_CLASS(mc); >> - >> - x86_cpu_set_default_version(pcmc->default_cpu_version); >> - >> - /* Calculates the limit to CPU APIC ID values >> - * >> - * Limit for the APIC ID value, so that all >> - * CPU APIC IDs are < pcms->apic_id_limit. >> - * >> - * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init(). >> - */ >> - pcms->apic_id_limit = x86_cpu_apic_id_from_index(pcms, >> - ms->smp.max_cpus - 1) + 1; >> - possible_cpus = mc->possible_cpu_arch_ids(ms); >> - for (i = 0; i < ms->smp.cpus; i++) { >> - pc_new_cpu(pcms, possible_cpus->cpus[i].arch_id, &error_fatal); >> - } >> -} >> - >> static void pc_build_feature_control_file(PCMachineState *pcms) >> { >> MachineState *ms = MACHINE(pcms); >> + X86MachineState *x86ms = X86_MACHINE(pcms); >> X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu); >> CPUX86State *env = &cpu->env; >> uint32_t unused, ecx, edx; >> @@ -1550,7 +1029,7 @@ static void pc_build_feature_control_file(PCMachineState *pcms) >> >> val = g_malloc(sizeof(*val)); >> *val = cpu_to_le64(feature_control_bits | FEATURE_CONTROL_LOCKED); >> - fw_cfg_add_file(pcms->fw_cfg, "etc/msr_feature_control", val, sizeof(*val)); >> + fw_cfg_add_file(x86ms->fw_cfg, "etc/msr_feature_control", val, sizeof(*val)); >> } >> >> static void rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count) >> @@ -1571,10 +1050,11 @@ void pc_machine_done(Notifier *notifier, void *data) >> { >> PCMachineState *pcms = container_of(notifier, >> PCMachineState, machine_done); >> + X86MachineState *x86ms = X86_MACHINE(pcms); >> PCIBus *bus = pcms->bus; >> >> /* set the number of CPUs */ >> - rtc_set_cpus_count(pcms->rtc, pcms->boot_cpus); >> + rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); >> >> if (bus) { >> int extra_hosts = 0; >> @@ -1585,23 +1065,23 @@ void pc_machine_done(Notifier *notifier, void *data) >> extra_hosts++; >> } >> } >> - if (extra_hosts && pcms->fw_cfg) { >> + if (extra_hosts && x86ms->fw_cfg) { >> uint64_t *val = g_malloc(sizeof(*val)); >> *val = cpu_to_le64(extra_hosts); >> - fw_cfg_add_file(pcms->fw_cfg, >> + fw_cfg_add_file(x86ms->fw_cfg, >> "etc/extra-pci-roots", val, sizeof(*val)); >> } >> } >> >> acpi_setup(); >> - if (pcms->fw_cfg) { >> + if (x86ms->fw_cfg) { >> pc_build_smbios(pcms); >> pc_build_feature_control_file(pcms); >> /* update FW_CFG_NB_CPUS to account for -device added CPUs */ >> - fw_cfg_modify_i16(pcms->fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus); >> + fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); >> } >> >> - if (pcms->apic_id_limit > 255 && !xen_enabled()) { >> + if (x86ms->apic_id_limit > 255 && !xen_enabled()) { >> IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default()); >> >> if (!iommu || !x86_iommu_ir_supported(X86_IOMMU_DEVICE(iommu)) || >> @@ -1619,8 +1099,9 @@ void pc_guest_info_init(PCMachineState *pcms) >> { >> int i; >> MachineState *ms = MACHINE(pcms); >> + X86MachineState *x86ms = X86_MACHINE(pcms); >> >> - pcms->apic_xrupt_override = kvm_allows_irq0_override(); >> + x86ms->apic_xrupt_override = kvm_allows_irq0_override(); >> pcms->numa_nodes = ms->numa_state->num_nodes; >> pcms->node_mem = g_malloc0(pcms->numa_nodes * >> sizeof *pcms->node_mem); >> @@ -1645,14 +1126,17 @@ void xen_load_linux(PCMachineState *pcms) >> { >> int i; >> FWCfgState *fw_cfg; >> + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); >> + X86MachineState *x86ms = X86_MACHINE(pcms); >> >> assert(MACHINE(pcms)->kernel_filename != NULL); >> >> fw_cfg = fw_cfg_init_io(FW_CFG_IO_BASE); >> - fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus); >> + fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); >> rom_set_fw(fw_cfg); >> >> - load_linux(pcms, fw_cfg); >> + load_linux(x86ms, fw_cfg, pcmc->acpi_data_size, >> + pcmc->linuxboot_dma_enabled, pcmc->pvh_enabled); >> for (i = 0; i < nb_option_roms; i++) { >> assert(!strcmp(option_rom[i].name, "linuxboot.bin") || >> !strcmp(option_rom[i].name, "linuxboot_dma.bin") || >> @@ -1660,7 +1144,7 @@ void xen_load_linux(PCMachineState *pcms) >> !strcmp(option_rom[i].name, "multiboot.bin")); >> rom_add_option(option_rom[i].name, option_rom[i].bootindex); >> } >> - pcms->fw_cfg = fw_cfg; >> + x86ms->fw_cfg = fw_cfg; >> } >> >> void pc_memory_init(PCMachineState *pcms, >> @@ -1673,10 +1157,11 @@ void pc_memory_init(PCMachineState *pcms, >> MemoryRegion *ram_below_4g, *ram_above_4g; >> FWCfgState *fw_cfg; >> MachineState *machine = MACHINE(pcms); >> + X86MachineState *x86ms = X86_MACHINE(pcms); >> PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); >> >> - assert(machine->ram_size == pcms->below_4g_mem_size + >> - pcms->above_4g_mem_size); >> + assert(machine->ram_size == x86ms->below_4g_mem_size + >> + x86ms->above_4g_mem_size); >> >> linux_boot = (machine->kernel_filename != NULL); >> >> @@ -1690,17 +1175,17 @@ void pc_memory_init(PCMachineState *pcms, >> *ram_memory = ram; >> ram_below_4g = g_malloc(sizeof(*ram_below_4g)); >> memory_region_init_alias(ram_below_4g, NULL, "ram-below-4g", ram, >> - 0, pcms->below_4g_mem_size); >> + 0, x86ms->below_4g_mem_size); >> memory_region_add_subregion(system_memory, 0, ram_below_4g); >> - e820_add_entry(0, pcms->below_4g_mem_size, E820_RAM); >> - if (pcms->above_4g_mem_size > 0) { >> + e820_add_entry(0, x86ms->below_4g_mem_size, E820_RAM); >> + if (x86ms->above_4g_mem_size > 0) { >> ram_above_4g = g_malloc(sizeof(*ram_above_4g)); >> memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g", ram, >> - pcms->below_4g_mem_size, >> - pcms->above_4g_mem_size); >> + x86ms->below_4g_mem_size, >> + x86ms->above_4g_mem_size); >> memory_region_add_subregion(system_memory, 0x100000000ULL, >> ram_above_4g); >> - e820_add_entry(0x100000000ULL, pcms->above_4g_mem_size, E820_RAM); >> + e820_add_entry(0x100000000ULL, x86ms->above_4g_mem_size, E820_RAM); >> } >> >> if (!pcmc->has_reserved_memory && >> @@ -1735,7 +1220,7 @@ void pc_memory_init(PCMachineState *pcms, >> } >> >> machine->device_memory->base = >> - ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size, 1 * GiB); >> + ROUND_UP(0x100000000ULL + x86ms->above_4g_mem_size, 1 * GiB); >> >> if (pcmc->enforce_aligned_dimm) { >> /* size device region assuming 1G page max alignment per slot */ >> @@ -1786,16 +1271,17 @@ void pc_memory_init(PCMachineState *pcms, >> } >> >> if (linux_boot) { >> - load_linux(pcms, fw_cfg); >> + load_linux(x86ms, fw_cfg, pcmc->acpi_data_size, >> + pcmc->linuxboot_dma_enabled, pcmc->pvh_enabled); >> } >> >> for (i = 0; i < nb_option_roms; i++) { >> rom_add_option(option_rom[i].name, option_rom[i].bootindex); >> } >> - pcms->fw_cfg = fw_cfg; >> + x86ms->fw_cfg = fw_cfg; >> >> /* Init default IOAPIC address space */ >> - pcms->ioapic_as = &address_space_memory; >> + x86ms->ioapic_as = &address_space_memory; >> } >> >> /* >> @@ -1807,6 +1293,7 @@ uint64_t pc_pci_hole64_start(void) >> PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); >> PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); >> MachineState *ms = MACHINE(pcms); >> + X86MachineState *x86ms = X86_MACHINE(pcms); >> uint64_t hole64_start = 0; >> >> if (pcmc->has_reserved_memory && ms->device_memory->base) { >> @@ -1815,7 +1302,7 @@ uint64_t pc_pci_hole64_start(void) >> hole64_start += memory_region_size(&ms->device_memory->mr); >> } >> } else { >> - hole64_start = 0x100000000ULL + pcms->above_4g_mem_size; >> + hole64_start = 0x100000000ULL + x86ms->above_4g_mem_size; >> } >> >> return ROUND_UP(hole64_start, 1 * GiB); >> @@ -2154,6 +1641,7 @@ static void pc_cpu_plug(HotplugHandler *hotplug_dev, >> Error *local_err = NULL; >> X86CPU *cpu = X86_CPU(dev); >> PCMachineState *pcms = PC_MACHINE(hotplug_dev); >> + X86MachineState *x86ms = X86_MACHINE(pcms); >> >> if (pcms->acpi_dev) { >> hotplug_handler_plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); >> @@ -2163,12 +1651,12 @@ static void pc_cpu_plug(HotplugHandler *hotplug_dev, >> } >> >> /* increment the number of CPUs */ >> - pcms->boot_cpus++; >> - if (pcms->rtc) { >> - rtc_set_cpus_count(pcms->rtc, pcms->boot_cpus); >> + x86ms->boot_cpus++; >> + if (x86ms->rtc) { >> + rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); >> } >> - if (pcms->fw_cfg) { >> - fw_cfg_modify_i16(pcms->fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus); >> + if (x86ms->fw_cfg) { >> + fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); >> } >> >> found_cpu = pc_find_cpu_slot(MACHINE(pcms), cpu->apic_id, NULL); >> @@ -2214,6 +1702,7 @@ static void pc_cpu_unplug_cb(HotplugHandler *hotplug_dev, >> Error *local_err = NULL; >> X86CPU *cpu = X86_CPU(dev); >> PCMachineState *pcms = PC_MACHINE(hotplug_dev); >> + X86MachineState *x86ms = X86_MACHINE(pcms); >> >> hotplug_handler_unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); >> if (local_err) { >> @@ -2225,10 +1714,10 @@ static void pc_cpu_unplug_cb(HotplugHandler *hotplug_dev, >> object_property_set_bool(OBJECT(dev), false, "realized", NULL); >> >> /* decrement the number of CPUs */ >> - pcms->boot_cpus--; >> + x86ms->boot_cpus--; >> /* Update the number of CPUs in CMOS */ >> - rtc_set_cpus_count(pcms->rtc, pcms->boot_cpus); >> - fw_cfg_modify_i16(pcms->fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus); >> + rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); >> + fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); >> out: >> error_propagate(errp, local_err); >> } >> @@ -2244,6 +1733,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, >> CPUX86State *env = &cpu->env; >> MachineState *ms = MACHINE(hotplug_dev); >> PCMachineState *pcms = PC_MACHINE(hotplug_dev); >> + X86MachineState *x86ms = X86_MACHINE(hotplug_dev); >> unsigned int smp_cores = ms->smp.cores; >> unsigned int smp_threads = ms->smp.threads; >> >> @@ -2253,7 +1743,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, >> return; >> } >> >> - env->nr_dies = pcms->smp_dies; >> + env->nr_dies = x86ms->smp_dies; >> >> /* >> * If APIC ID is not set, >> @@ -2261,13 +1751,13 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, >> */ >> if (cpu->apic_id == UNASSIGNED_APIC_ID) { >> int max_socket = (ms->smp.max_cpus - 1) / >> - smp_threads / smp_cores / pcms->smp_dies; >> + smp_threads / smp_cores / x86ms->smp_dies; >> >> /* >> * die-id was optional in QEMU 4.0 and older, so keep it optional >> * if there's only one die per socket. >> */ >> - if (cpu->die_id < 0 && pcms->smp_dies == 1) { >> + if (cpu->die_id < 0 && x86ms->smp_dies == 1) { >> cpu->die_id = 0; >> } >> >> @@ -2282,9 +1772,9 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, >> if (cpu->die_id < 0) { >> error_setg(errp, "CPU die-id is not set"); >> return; >> - } else if (cpu->die_id > pcms->smp_dies - 1) { >> + } else if (cpu->die_id > x86ms->smp_dies - 1) { >> error_setg(errp, "Invalid CPU die-id: %u must be in range 0:%u", >> - cpu->die_id, pcms->smp_dies - 1); >> + cpu->die_id, x86ms->smp_dies - 1); >> return; >> } >> if (cpu->core_id < 0) { >> @@ -2308,7 +1798,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, >> topo.die_id = cpu->die_id; >> topo.core_id = cpu->core_id; >> topo.smt_id = cpu->thread_id; >> - cpu->apic_id = apicid_from_topo_ids(pcms->smp_dies, smp_cores, >> + cpu->apic_id = apicid_from_topo_ids(x86ms->smp_dies, smp_cores, >> smp_threads, &topo); >> } >> >> @@ -2316,7 +1806,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, >> if (!cpu_slot) { >> MachineState *ms = MACHINE(pcms); >> >> - x86_topo_ids_from_apicid(cpu->apic_id, pcms->smp_dies, >> + x86_topo_ids_from_apicid(cpu->apic_id, x86ms->smp_dies, >> smp_cores, smp_threads, &topo); >> error_setg(errp, >> "Invalid CPU [socket: %u, die: %u, core: %u, thread: %u] with" >> @@ -2338,7 +1828,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, >> /* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn() >> * once -smp refactoring is complete and there will be CPU private >> * CPUState::nr_cores and CPUState::nr_threads fields instead of globals */ >> - x86_topo_ids_from_apicid(cpu->apic_id, pcms->smp_dies, >> + x86_topo_ids_from_apicid(cpu->apic_id, x86ms->smp_dies, >> smp_cores, smp_threads, &topo); >> if (cpu->socket_id != -1 && cpu->socket_id != topo.pkg_id) { >> error_setg(errp, "property socket-id: %u doesn't match set apic-id:" >> @@ -2520,45 +2010,6 @@ pc_machine_get_device_memory_region_size(Object *obj, Visitor *v, >> visit_type_int(v, name, &value, errp); >> } >> >> -static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v, >> - const char *name, void *opaque, >> - Error **errp) >> -{ >> - PCMachineState *pcms = PC_MACHINE(obj); >> - uint64_t value = pcms->max_ram_below_4g; >> - >> - visit_type_size(v, name, &value, errp); >> -} >> - >> -static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v, >> - const char *name, void *opaque, >> - Error **errp) >> -{ >> - PCMachineState *pcms = PC_MACHINE(obj); >> - Error *error = NULL; >> - uint64_t value; >> - >> - visit_type_size(v, name, &value, &error); >> - if (error) { >> - error_propagate(errp, error); >> - return; >> - } >> - if (value > 4 * GiB) { >> - error_setg(&error, >> - "Machine option 'max-ram-below-4g=%"PRIu64 >> - "' expects size less than or equal to 4G", value); >> - error_propagate(errp, error); >> - return; >> - } >> - >> - if (value < 1 * MiB) { >> - warn_report("Only %" PRIu64 " bytes of RAM below the 4GiB boundary," >> - "BIOS may not work with less than 1MiB", value); >> - } >> - >> - pcms->max_ram_below_4g = value; >> -} >> - >> static void pc_machine_get_vmport(Object *obj, Visitor *v, const char *name, >> void *opaque, Error **errp) >> { >> @@ -2664,7 +2115,6 @@ static void pc_machine_initfn(Object *obj) >> { >> PCMachineState *pcms = PC_MACHINE(obj); >> >> - pcms->max_ram_below_4g = 0; /* use default */ >> pcms->smm = ON_OFF_AUTO_AUTO; >> #ifdef CONFIG_VMPORT >> pcms->vmport = ON_OFF_AUTO_AUTO; >> @@ -2676,7 +2126,6 @@ static void pc_machine_initfn(Object *obj) >> pcms->smbus_enabled = true; >> pcms->sata_enabled = true; >> pcms->pit_enabled = true; >> - pcms->smp_dies = 1; >> >> pc_system_flash_create(pcms); >> } >> @@ -2707,85 +2156,6 @@ static void pc_machine_wakeup(MachineState *machine) >> cpu_synchronize_all_post_reset(); >> } >> >> -static CpuInstanceProperties >> -pc_cpu_index_to_props(MachineState *ms, unsigned cpu_index) >> -{ >> - MachineClass *mc = MACHINE_GET_CLASS(ms); >> - const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); >> - >> - assert(cpu_index < possible_cpus->len); >> - return possible_cpus->cpus[cpu_index].props; >> -} >> - >> -static int64_t pc_get_default_cpu_node_id(const MachineState *ms, int idx) >> -{ >> - X86CPUTopoInfo topo; >> - PCMachineState *pcms = PC_MACHINE(ms); >> - >> - assert(idx < ms->possible_cpus->len); >> - x86_topo_ids_from_apicid(ms->possible_cpus->cpus[idx].arch_id, >> - pcms->smp_dies, ms->smp.cores, >> - ms->smp.threads, &topo); >> - return topo.pkg_id % ms->numa_state->num_nodes; >> -} >> - >> -static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms) >> -{ >> - PCMachineState *pcms = PC_MACHINE(ms); >> - int i; >> - unsigned int max_cpus = ms->smp.max_cpus; >> - >> - if (ms->possible_cpus) { >> - /* >> - * make sure that max_cpus hasn't changed since the first use, i.e. >> - * -smp hasn't been parsed after it >> - */ >> - assert(ms->possible_cpus->len == max_cpus); >> - return ms->possible_cpus; >> - } >> - >> - ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) + >> - sizeof(CPUArchId) * max_cpus); >> - ms->possible_cpus->len = max_cpus; >> - for (i = 0; i < ms->possible_cpus->len; i++) { >> - X86CPUTopoInfo topo; >> - >> - ms->possible_cpus->cpus[i].type = ms->cpu_type; >> - ms->possible_cpus->cpus[i].vcpus_count = 1; >> - ms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(pcms, i); >> - x86_topo_ids_from_apicid(ms->possible_cpus->cpus[i].arch_id, >> - pcms->smp_dies, ms->smp.cores, >> - ms->smp.threads, &topo); >> - ms->possible_cpus->cpus[i].props.has_socket_id = true; >> - ms->possible_cpus->cpus[i].props.socket_id = topo.pkg_id; >> - if (pcms->smp_dies > 1) { >> - ms->possible_cpus->cpus[i].props.has_die_id = true; >> - ms->possible_cpus->cpus[i].props.die_id = topo.die_id; >> - } >> - ms->possible_cpus->cpus[i].props.has_core_id = true; >> - ms->possible_cpus->cpus[i].props.core_id = topo.core_id; >> - ms->possible_cpus->cpus[i].props.has_thread_id = true; >> - ms->possible_cpus->cpus[i].props.thread_id = topo.smt_id; >> - } >> - return ms->possible_cpus; >> -} >> - >> -static void x86_nmi(NMIState *n, int cpu_index, Error **errp) >> -{ >> - /* cpu index isn't used */ >> - CPUState *cs; >> - >> - CPU_FOREACH(cs) { >> - X86CPU *cpu = X86_CPU(cs); >> - >> - if (!cpu->apic_state) { >> - cpu_interrupt(cs, CPU_INTERRUPT_NMI); >> - } else { >> - apic_deliver_nmi(cpu->apic_state); >> - } >> - } >> -} >> - >> static void pc_machine_class_init(ObjectClass *oc, void *data) >> { >> MachineClass *mc = MACHINE_CLASS(oc); >> @@ -2810,14 +2180,11 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) >> pcmc->pvh_enabled = true; >> assert(!mc->get_hotplug_handler); >> mc->get_hotplug_handler = pc_get_hotplug_handler; >> - mc->cpu_index_to_instance_props = pc_cpu_index_to_props; >> - mc->get_default_cpu_node_id = pc_get_default_cpu_node_id; >> - mc->possible_cpu_arch_ids = pc_possible_cpu_arch_ids; >> mc->auto_enable_numa_with_memhp = true; >> mc->has_hotpluggable_cpus = true; >> mc->default_boot_order = "cad"; >> - mc->hot_add_cpu = pc_hot_add_cpu; >> - mc->smp_parse = pc_smp_parse; >> + mc->hot_add_cpu = x86_hot_add_cpu; >> + mc->smp_parse = x86_smp_parse; >> mc->block_default_type = IF_IDE; >> mc->max_cpus = 255; >> mc->reset = pc_machine_reset; >> @@ -2835,13 +2202,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) >> pc_machine_get_device_memory_region_size, NULL, >> NULL, NULL, &error_abort); >> >> - object_class_property_add(oc, PC_MACHINE_MAX_RAM_BELOW_4G, "size", >> - pc_machine_get_max_ram_below_4g, pc_machine_set_max_ram_below_4g, >> - NULL, NULL, &error_abort); >> - >> - object_class_property_set_description(oc, PC_MACHINE_MAX_RAM_BELOW_4G, >> - "Maximum ram below the 4G boundary (32bit boundary)", &error_abort); >> - >> object_class_property_add(oc, PC_MACHINE_SMM, "OnOffAuto", >> pc_machine_get_smm, pc_machine_set_smm, >> NULL, NULL, &error_abort); >> @@ -2866,7 +2226,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) >> >> static const TypeInfo pc_machine_info = { >> .name = TYPE_PC_MACHINE, >> - .parent = TYPE_MACHINE, >> + .parent = TYPE_X86_MACHINE, >> .abstract = true, >> .instance_size = sizeof(PCMachineState), >> .instance_init = pc_machine_initfn, >> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c >> index 2362675149..f63c27bc74 100644 >> --- a/hw/i386/pc_piix.c >> +++ b/hw/i386/pc_piix.c >> @@ -27,6 +27,7 @@ >> >> #include "qemu/units.h" >> #include "hw/loader.h" >> +#include "hw/i386/x86.h" >> #include "hw/i386/pc.h" >> #include "hw/i386/apic.h" >> #include "hw/display/ramfb.h" >> @@ -73,6 +74,7 @@ static void pc_init1(MachineState *machine, >> { >> PCMachineState *pcms = PC_MACHINE(machine); >> PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); >> + X86MachineState *x86ms = X86_MACHINE(pcms); >> MemoryRegion *system_memory = get_system_memory(); >> MemoryRegion *system_io = get_system_io(); >> int i; >> @@ -125,11 +127,11 @@ static void pc_init1(MachineState *machine, >> if (xen_enabled()) { >> xen_hvm_init(pcms, &ram_memory); >> } else { >> - if (!pcms->max_ram_below_4g) { >> - pcms->max_ram_below_4g = 0xe0000000; /* default: 3.5G */ >> + if (!x86ms->max_ram_below_4g) { >> + x86ms->max_ram_below_4g = 0xe0000000; /* default: 3.5G */ >> } >> - lowmem = pcms->max_ram_below_4g; >> - if (machine->ram_size >= pcms->max_ram_below_4g) { >> + lowmem = x86ms->max_ram_below_4g; >> + if (machine->ram_size >= x86ms->max_ram_below_4g) { >> if (pcmc->gigabyte_align) { >> if (lowmem > 0xc0000000) { >> lowmem = 0xc0000000; >> @@ -138,21 +140,21 @@ static void pc_init1(MachineState *machine, >> warn_report("Large machine and max_ram_below_4g " >> "(%" PRIu64 ") not a multiple of 1G; " >> "possible bad performance.", >> - pcms->max_ram_below_4g); >> + x86ms->max_ram_below_4g); >> } >> } >> } >> >> if (machine->ram_size >= lowmem) { >> - pcms->above_4g_mem_size = machine->ram_size - lowmem; >> - pcms->below_4g_mem_size = lowmem; >> + x86ms->above_4g_mem_size = machine->ram_size - lowmem; >> + x86ms->below_4g_mem_size = lowmem; >> } else { >> - pcms->above_4g_mem_size = 0; >> - pcms->below_4g_mem_size = machine->ram_size; >> + x86ms->above_4g_mem_size = 0; >> + x86ms->below_4g_mem_size = machine->ram_size; >> } >> } >> >> - pc_cpus_init(pcms); >> + x86_cpus_init(x86ms, pcmc->default_cpu_version); >> >> if (kvm_enabled() && pcmc->kvmclock_enabled) { >> kvmclock_create(); >> @@ -190,19 +192,19 @@ static void pc_init1(MachineState *machine, >> gsi_state = g_malloc0(sizeof(*gsi_state)); >> if (kvm_ioapic_in_kernel()) { >> kvm_pc_setup_irq_routing(pcmc->pci_enabled); >> - pcms->gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state, >> - GSI_NUM_PINS); >> + x86ms->gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state, >> + GSI_NUM_PINS); >> } else { >> - pcms->gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS); >> + x86ms->gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS); >> } >> >> if (pcmc->pci_enabled) { >> pci_bus = i440fx_init(host_type, >> pci_type, >> - &i440fx_state, &piix3_devfn, &isa_bus, pcms->gsi, >> + &i440fx_state, &piix3_devfn, &isa_bus, x86ms->gsi, >> system_memory, system_io, machine->ram_size, >> - pcms->below_4g_mem_size, >> - pcms->above_4g_mem_size, >> + x86ms->below_4g_mem_size, >> + x86ms->above_4g_mem_size, >> pci_memory, ram_memory); >> pcms->bus = pci_bus; >> } else { >> @@ -212,7 +214,7 @@ static void pc_init1(MachineState *machine, >> &error_abort); >> no_hpet = 1; >> } >> - isa_bus_irqs(isa_bus, pcms->gsi); >> + isa_bus_irqs(isa_bus, x86ms->gsi); >> >> if (kvm_pic_in_kernel()) { >> i8259 = kvm_i8259_init(isa_bus); >> @@ -230,7 +232,7 @@ static void pc_init1(MachineState *machine, >> ioapic_init_gsi(gsi_state, "i440fx"); >> } >> >> - pc_register_ferr_irq(pcms->gsi[13]); >> + pc_register_ferr_irq(x86ms->gsi[13]); >> >> pc_vga_init(isa_bus, pcmc->pci_enabled ? pci_bus : NULL); >> >> @@ -240,7 +242,7 @@ static void pc_init1(MachineState *machine, >> } >> >> /* init basic PC hardware */ >> - pc_basic_device_init(isa_bus, pcms->gsi, &rtc_state, true, >> + pc_basic_device_init(isa_bus, x86ms->gsi, &rtc_state, true, >> (pcms->vmport != ON_OFF_AUTO_ON), pcms->pit_enabled, >> 0x4); >> >> @@ -288,7 +290,7 @@ else { >> smi_irq = qemu_allocate_irq(pc_acpi_smi_interrupt, first_cpu, 0); >> /* TODO: Populate SPD eeprom data. */ >> smbus = piix4_pm_init(pci_bus, piix3_devfn + 3, 0xb100, >> - pcms->gsi[9], smi_irq, >> + x86ms->gsi[9], smi_irq, >> pc_machine_is_smm_enabled(pcms), >> &piix4_pm); >> smbus_eeprom_init(smbus, 8, NULL, 0); >> @@ -304,7 +306,7 @@ else { >> >> if (machine->nvdimms_state->is_enabled) { >> nvdimm_init_acpi_state(machine->nvdimms_state, system_io, >> - pcms->fw_cfg, OBJECT(pcms)); >> + x86ms->fw_cfg, OBJECT(pcms)); >> } >> } >> >> @@ -728,7 +730,7 @@ DEFINE_I440FX_MACHINE(v1_4, "pc-i440fx-1.4", pc_compat_1_4_fn, >> >> static void pc_i440fx_1_3_machine_options(MachineClass *m) >> { >> - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); >> + X86MachineClass *x86mc = X86_MACHINE_CLASS(m); >> static GlobalProperty compat[] = { >> PC_CPU_MODEL_IDS("1.3.0") >> { "usb-tablet", "usb_version", "1" }, >> @@ -739,7 +741,7 @@ static void pc_i440fx_1_3_machine_options(MachineClass *m) >> >> pc_i440fx_1_4_machine_options(m); >> m->hw_version = "1.3.0"; >> - pcmc->compat_apic_id_mode = true; >> + x86mc->compat_apic_id_mode = true; >> compat_props_add(m->compat_props, compat, G_N_ELEMENTS(compat)); >> } >> >> diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c >> index d4e8a1cb9f..71f71bc61d 100644 >> --- a/hw/i386/pc_q35.c >> +++ b/hw/i386/pc_q35.c >> @@ -41,6 +41,7 @@ >> #include "hw/pci-host/q35.h" >> #include "hw/qdev-properties.h" >> #include "exec/address-spaces.h" >> +#include "hw/i386/x86.h" >> #include "hw/i386/pc.h" >> #include "hw/i386/ich9.h" >> #include "hw/i386/amd_iommu.h" >> @@ -115,6 +116,7 @@ static void pc_q35_init(MachineState *machine) >> { >> PCMachineState *pcms = PC_MACHINE(machine); >> PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); >> + X86MachineState *x86ms = X86_MACHINE(pcms); >> Q35PCIHost *q35_host; >> PCIHostState *phb; >> PCIBus *host_bus; >> @@ -152,34 +154,34 @@ static void pc_q35_init(MachineState *machine) >> /* Handle the machine opt max-ram-below-4g. It is basically doing >> * min(qemu limit, user limit). >> */ >> - if (!pcms->max_ram_below_4g) { >> - pcms->max_ram_below_4g = 1ULL << 32; /* default: 4G */; >> + if (!x86ms->max_ram_below_4g) { >> + x86ms->max_ram_below_4g = 1ULL << 32; /* default: 4G */; >> } >> - if (lowmem > pcms->max_ram_below_4g) { >> - lowmem = pcms->max_ram_below_4g; >> + if (lowmem > x86ms->max_ram_below_4g) { >> + lowmem = x86ms->max_ram_below_4g; >> if (machine->ram_size - lowmem > lowmem && >> lowmem & (1 * GiB - 1)) { >> warn_report("There is possibly poor performance as the ram size " >> " (0x%" PRIx64 ") is more then twice the size of" >> " max-ram-below-4g (%"PRIu64") and" >> " max-ram-below-4g is not a multiple of 1G.", >> - (uint64_t)machine->ram_size, pcms->max_ram_below_4g); >> + (uint64_t)machine->ram_size, x86ms->max_ram_below_4g); >> } >> } >> >> if (machine->ram_size >= lowmem) { >> - pcms->above_4g_mem_size = machine->ram_size - lowmem; >> - pcms->below_4g_mem_size = lowmem; >> + x86ms->above_4g_mem_size = machine->ram_size - lowmem; >> + x86ms->below_4g_mem_size = lowmem; >> } else { >> - pcms->above_4g_mem_size = 0; >> - pcms->below_4g_mem_size = machine->ram_size; >> + x86ms->above_4g_mem_size = 0; >> + x86ms->below_4g_mem_size = machine->ram_size; >> } >> >> if (xen_enabled()) { >> xen_hvm_init(pcms, &ram_memory); >> } >> >> - pc_cpus_init(pcms); >> + x86_cpus_init(x86ms, pcmc->default_cpu_version); >> >> kvmclock_create(); >> >> @@ -213,10 +215,10 @@ static void pc_q35_init(MachineState *machine) >> gsi_state = g_malloc0(sizeof(*gsi_state)); >> if (kvm_ioapic_in_kernel()) { >> kvm_pc_setup_irq_routing(pcmc->pci_enabled); >> - pcms->gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state, >> + x86ms->gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state, >> GSI_NUM_PINS); >> } else { >> - pcms->gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS); >> + x86ms->gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS); >> } >> >> /* create pci host bus */ >> @@ -231,9 +233,9 @@ static void pc_q35_init(MachineState *machine) >> MCH_HOST_PROP_SYSTEM_MEM, NULL); >> object_property_set_link(OBJECT(q35_host), OBJECT(system_io), >> MCH_HOST_PROP_IO_MEM, NULL); >> - object_property_set_int(OBJECT(q35_host), pcms->below_4g_mem_size, >> + object_property_set_int(OBJECT(q35_host), x86ms->below_4g_mem_size, >> PCI_HOST_BELOW_4G_MEM_SIZE, NULL); >> - object_property_set_int(OBJECT(q35_host), pcms->above_4g_mem_size, >> + object_property_set_int(OBJECT(q35_host), x86ms->above_4g_mem_size, >> PCI_HOST_ABOVE_4G_MEM_SIZE, NULL); >> /* pci */ >> qdev_init_nofail(DEVICE(q35_host)); >> @@ -255,7 +257,7 @@ static void pc_q35_init(MachineState *machine) >> ich9_lpc = ICH9_LPC_DEVICE(lpc); >> lpc_dev = DEVICE(lpc); >> for (i = 0; i < GSI_NUM_PINS; i++) { >> - qdev_connect_gpio_out_named(lpc_dev, ICH9_GPIO_GSI, i, pcms->gsi[i]); >> + qdev_connect_gpio_out_named(lpc_dev, ICH9_GPIO_GSI, i, x86ms->gsi[i]); >> } >> pci_bus_irqs(host_bus, ich9_lpc_set_irq, ich9_lpc_map_irq, ich9_lpc, >> ICH9_LPC_NB_PIRQS); >> @@ -279,7 +281,7 @@ static void pc_q35_init(MachineState *machine) >> ioapic_init_gsi(gsi_state, "q35"); >> } >> >> - pc_register_ferr_irq(pcms->gsi[13]); >> + pc_register_ferr_irq(x86ms->gsi[13]); >> >> assert(pcms->vmport != ON_OFF_AUTO__MAX); >> if (pcms->vmport == ON_OFF_AUTO_AUTO) { >> @@ -287,7 +289,7 @@ static void pc_q35_init(MachineState *machine) >> } >> >> /* init basic PC hardware */ >> - pc_basic_device_init(isa_bus, pcms->gsi, &rtc_state, !mc->no_floppy, >> + pc_basic_device_init(isa_bus, x86ms->gsi, &rtc_state, !mc->no_floppy, >> (pcms->vmport != ON_OFF_AUTO_ON), pcms->pit_enabled, >> 0xff0104); >> >> @@ -330,7 +332,7 @@ static void pc_q35_init(MachineState *machine) >> >> if (machine->nvdimms_state->is_enabled) { >> nvdimm_init_acpi_state(machine->nvdimms_state, system_io, >> - pcms->fw_cfg, OBJECT(pcms)); >> + x86ms->fw_cfg, OBJECT(pcms)); >> } >> } >> >> diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c >> index a9983f0bfb..97f38e0423 100644 >> --- a/hw/i386/pc_sysfw.c >> +++ b/hw/i386/pc_sysfw.c >> @@ -31,6 +31,7 @@ >> #include "qemu/option.h" >> #include "qemu/units.h" >> #include "hw/sysbus.h" >> +#include "hw/i386/x86.h" >> #include "hw/i386/pc.h" >> #include "hw/loader.h" >> #include "hw/qdev-properties.h" >> @@ -38,8 +39,6 @@ >> #include "hw/block/flash.h" >> #include "sysemu/kvm.h" >> >> -#define BIOS_FILENAME "bios.bin" >> - >> /* >> * We don't have a theoretically justifiable exact lower bound on the base >> * address of any flash mapping. In practice, the IO-APIC MMIO range is >> @@ -211,59 +210,6 @@ static void pc_system_flash_map(PCMachineState *pcms, >> } >> } >> >> -static void old_pc_system_rom_init(MemoryRegion *rom_memory, bool isapc_ram_fw) >> -{ >> - char *filename; >> - MemoryRegion *bios, *isa_bios; >> - int bios_size, isa_bios_size; >> - int ret; >> - >> - /* BIOS load */ >> - if (bios_name == NULL) { >> - bios_name = BIOS_FILENAME; >> - } >> - filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); >> - if (filename) { >> - bios_size = get_image_size(filename); >> - } else { >> - bios_size = -1; >> - } >> - if (bios_size <= 0 || >> - (bios_size % 65536) != 0) { >> - goto bios_error; >> - } >> - bios = g_malloc(sizeof(*bios)); >> - memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal); >> - if (!isapc_ram_fw) { >> - memory_region_set_readonly(bios, true); >> - } >> - ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); >> - if (ret != 0) { >> - bios_error: >> - fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name); >> - exit(1); >> - } >> - g_free(filename); >> - >> - /* map the last 128KB of the BIOS in ISA space */ >> - isa_bios_size = MIN(bios_size, 128 * KiB); >> - isa_bios = g_malloc(sizeof(*isa_bios)); >> - memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, >> - bios_size - isa_bios_size, isa_bios_size); >> - memory_region_add_subregion_overlap(rom_memory, >> - 0x100000 - isa_bios_size, >> - isa_bios, >> - 1); >> - if (!isapc_ram_fw) { >> - memory_region_set_readonly(isa_bios, true); >> - } >> - >> - /* map all the bios at the top of memory */ >> - memory_region_add_subregion(rom_memory, >> - (uint32_t)(-bios_size), >> - bios); >> -} >> - >> void pc_system_firmware_init(PCMachineState *pcms, >> MemoryRegion *rom_memory) >> { >> @@ -272,7 +218,7 @@ void pc_system_firmware_init(PCMachineState *pcms, >> BlockBackend *pflash_blk[ARRAY_SIZE(pcms->flash)]; >> >> if (!pcmc->pci_enabled) { >> - old_pc_system_rom_init(rom_memory, true); >> + x86_system_rom_init(rom_memory, true); >> return; >> } >> >> @@ -293,7 +239,7 @@ void pc_system_firmware_init(PCMachineState *pcms, >> >> if (!pflash_blk[0]) { >> /* Machine property pflash0 not set, use ROM mode */ >> - old_pc_system_rom_init(rom_memory, false); >> + x86_system_rom_init(rom_memory, false); >> } else { >> if (kvm_enabled() && !kvm_readonly_mem_enabled()) { >> /* >> diff --git a/hw/i386/x86.c b/hw/i386/x86.c >> new file mode 100644 >> index 0000000000..4de9dd100f >> --- /dev/null >> +++ b/hw/i386/x86.c >> @@ -0,0 +1,788 @@ >> +/* >> + * Copyright (c) 2003-2004 Fabrice Bellard >> + * Copyright (c) 2019 Red Hat, Inc. >> + * >> + * Permission is hereby granted, free of charge, to any person obtaining a copy >> + * of this software and associated documentation files (the "Software"), to deal >> + * in the Software without restriction, including without limitation the rights >> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell >> + * copies of the Software, and to permit persons to whom the Software is >> + * furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice shall be included in >> + * all copies or substantial portions of the Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER >> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, >> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN >> + * THE SOFTWARE. >> + */ >> +#include "qemu/osdep.h" >> +#include "qemu/error-report.h" >> +#include "qemu/option.h" >> +#include "qemu/cutils.h" >> +#include "qemu/units.h" >> +#include "qapi/error.h" >> +#include "qapi/qmp/qerror.h" >> +#include "qapi/qapi-visit-common.h" >> +#include "qapi/visitor.h" >> +#include "sysemu/qtest.h" >> +#include "sysemu/numa.h" >> +#include "sysemu/replay.h" >> +#include "sysemu/sysemu.h" >> + >> +#include "hw/i386/x86.h" >> +#include "target/i386/cpu.h" >> +#include "hw/i386/topology.h" >> +#include "hw/i386/fw_cfg.h" >> +#include "hw/acpi/cpu_hotplug.h" >> +#include "hw/nmi.h" >> +#include "hw/loader.h" >> +#include "multiboot.h" >> +#include "pvh.h" >> +#include "standard-headers/asm-x86/bootparam.h" >> + >> +#define BIOS_FILENAME "bios.bin" >> + >> +/* Calculates initial APIC ID for a specific CPU index >> + * >> + * Currently we need to be able to calculate the APIC ID from the CPU index >> + * alone (without requiring a CPU object), as the QEMU<->Seabios interfaces have >> + * no concept of "CPU index", and the NUMA tables on fw_cfg need the APIC ID of >> + * all CPUs up to max_cpus. >> + */ >> +uint32_t x86_cpu_apic_id_from_index(X86MachineState *x86ms, >> + unsigned int cpu_index) >> +{ >> + MachineState *ms = MACHINE(x86ms); >> + X86MachineClass *x86mc = X86_MACHINE_GET_CLASS(x86ms); >> + uint32_t correct_id; >> + static bool warned; >> + >> + correct_id = x86_apicid_from_cpu_idx(x86ms->smp_dies, ms->smp.cores, >> + ms->smp.threads, cpu_index); >> + if (x86mc->compat_apic_id_mode) { >> + if (cpu_index != correct_id && !warned && !qtest_enabled()) { >> + error_report("APIC IDs set in compatibility mode, " >> + "CPU topology won't match the configuration"); >> + warned = true; >> + } >> + return cpu_index; >> + } else { >> + return correct_id; >> + } >> +} >> + >> + >> +static void x86_new_cpu(X86MachineState *x86ms, int64_t apic_id, Error **errp) >> +{ >> + Object *cpu = NULL; >> + Error *local_err = NULL; >> + CPUX86State *env = NULL; >> + >> + cpu = object_new(MACHINE(x86ms)->cpu_type); >> + >> + env = &X86_CPU(cpu)->env; >> + env->nr_dies = x86ms->smp_dies; >> + >> + object_property_set_uint(cpu, apic_id, "apic-id", &local_err); >> + object_property_set_bool(cpu, true, "realized", &local_err); >> + >> + object_unref(cpu); >> + error_propagate(errp, local_err); >> +} >> + >> +/* >> + * This function is very similar to smp_parse() >> + * in hw/core/machine.c but includes CPU die support. >> + */ >> +void x86_smp_parse(MachineState *ms, QemuOpts *opts) >> +{ >> + X86MachineState *x86ms = X86_MACHINE(ms); >> + >> + if (opts) { >> + unsigned cpus = qemu_opt_get_number(opts, "cpus", 0); >> + unsigned sockets = qemu_opt_get_number(opts, "sockets", 0); >> + unsigned dies = qemu_opt_get_number(opts, "dies", 1); >> + unsigned cores = qemu_opt_get_number(opts, "cores", 0); >> + unsigned threads = qemu_opt_get_number(opts, "threads", 0); >> + >> + /* compute missing values, prefer sockets over cores over threads */ >> + if (cpus == 0 || sockets == 0) { >> + cores = cores > 0 ? cores : 1; >> + threads = threads > 0 ? threads : 1; >> + if (cpus == 0) { >> + sockets = sockets > 0 ? sockets : 1; >> + cpus = cores * threads * dies * sockets; >> + } else { >> + ms->smp.max_cpus = >> + qemu_opt_get_number(opts, "maxcpus", cpus); >> + sockets = ms->smp.max_cpus / (cores * threads * dies); >> + } >> + } else if (cores == 0) { >> + threads = threads > 0 ? threads : 1; >> + cores = cpus / (sockets * dies * threads); >> + cores = cores > 0 ? cores : 1; >> + } else if (threads == 0) { >> + threads = cpus / (cores * dies * sockets); >> + threads = threads > 0 ? threads : 1; >> + } else if (sockets * dies * cores * threads < cpus) { >> + error_report("cpu topology: " >> + "sockets (%u) * dies (%u) * cores (%u) * threads (%u) < " >> + "smp_cpus (%u)", >> + sockets, dies, cores, threads, cpus); >> + exit(1); >> + } >> + >> + ms->smp.max_cpus = >> + qemu_opt_get_number(opts, "maxcpus", cpus); >> + >> + if (ms->smp.max_cpus < cpus) { >> + error_report("maxcpus must be equal to or greater than smp"); >> + exit(1); >> + } >> + >> + if (sockets * dies * cores * threads > ms->smp.max_cpus) { >> + error_report("cpu topology: " >> + "sockets (%u) * dies (%u) * cores (%u) * threads (%u) > " >> + "maxcpus (%u)", >> + sockets, dies, cores, threads, >> + ms->smp.max_cpus); >> + exit(1); >> + } >> + >> + if (sockets * dies * cores * threads != ms->smp.max_cpus) { >> + warn_report("Invalid CPU topology deprecated: " >> + "sockets (%u) * dies (%u) * cores (%u) * threads (%u) " >> + "!= maxcpus (%u)", >> + sockets, dies, cores, threads, >> + ms->smp.max_cpus); >> + } >> + >> + ms->smp.cpus = cpus; >> + ms->smp.cores = cores; >> + ms->smp.threads = threads; >> + x86ms->smp_dies = dies; >> + } >> + >> + if (ms->smp.cpus > 1) { >> + Error *blocker = NULL; >> + error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED, "smp"); >> + replay_add_blocker(blocker); >> + } >> +} >> + >> +void x86_hot_add_cpu(MachineState *ms, const int64_t id, Error **errp) >> +{ >> + X86MachineState *x86ms = X86_MACHINE(ms); >> + int64_t apic_id = x86_cpu_apic_id_from_index(x86ms, id); >> + Error *local_err = NULL; >> + >> + if (id < 0) { >> + error_setg(errp, "Invalid CPU id: %" PRIi64, id); >> + return; >> + } >> + >> + if (apic_id >= ACPI_CPU_HOTPLUG_ID_LIMIT) { >> + error_setg(errp, "Unable to add CPU: %" PRIi64 >> + ", resulting APIC ID (%" PRIi64 ") is too large", >> + id, apic_id); >> + return; >> + } >> + >> + x86_new_cpu(X86_MACHINE(ms), apic_id, &local_err); >> + if (local_err) { >> + error_propagate(errp, local_err); >> + return; >> + } >> +} >> + >> +void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) >> +{ >> + int i; >> + const CPUArchIdList *possible_cpus; >> + MachineState *ms = MACHINE(x86ms); >> + MachineClass *mc = MACHINE_GET_CLASS(x86ms); >> + >> + x86_cpu_set_default_version(default_cpu_version); >> + >> + /* Calculates the limit to CPU APIC ID values >> + * >> + * Limit for the APIC ID value, so that all >> + * CPU APIC IDs are < x86ms->apic_id_limit. >> + * >> + * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init(). >> + */ >> + x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms, >> + ms->smp.max_cpus - 1) + 1; >> + possible_cpus = mc->possible_cpu_arch_ids(ms); >> + for (i = 0; i < ms->smp.cpus; i++) { >> + x86_new_cpu(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); >> + } >> +} >> + >> +void x86_nmi(NMIState *n, int cpu_index, Error **errp) >> +{ >> + /* cpu index isn't used */ >> + CPUState *cs; >> + >> + CPU_FOREACH(cs) { >> + X86CPU *cpu = X86_CPU(cs); >> + >> + if (!cpu->apic_state) { >> + cpu_interrupt(cs, CPU_INTERRUPT_NMI); >> + } else { >> + apic_deliver_nmi(cpu->apic_state); >> + } >> + } >> +} >> + >> +CpuInstanceProperties >> +x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index) >> +{ >> + MachineClass *mc = MACHINE_GET_CLASS(ms); >> + const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); >> + >> + assert(cpu_index < possible_cpus->len); >> + return possible_cpus->cpus[cpu_index].props; >> +} >> + >> +int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx) >> +{ >> + X86CPUTopoInfo topo; >> + X86MachineState *x86ms = X86_MACHINE(ms); >> + >> + assert(idx < ms->possible_cpus->len); >> + x86_topo_ids_from_apicid(ms->possible_cpus->cpus[idx].arch_id, >> + x86ms->smp_dies, ms->smp.cores, >> + ms->smp.threads, &topo); >> + return topo.pkg_id % ms->numa_state->num_nodes; >> +} >> + >> +const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms) >> +{ >> + X86MachineState *x86ms = X86_MACHINE(ms); >> + int i; >> + unsigned int max_cpus = ms->smp.max_cpus; >> + >> + if (ms->possible_cpus) { >> + /* >> + * make sure that max_cpus hasn't changed since the first use, i.e. >> + * -smp hasn't been parsed after it >> + */ >> + assert(ms->possible_cpus->len == max_cpus); >> + return ms->possible_cpus; >> + } >> + >> + ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) + >> + sizeof(CPUArchId) * max_cpus); >> + ms->possible_cpus->len = max_cpus; >> + for (i = 0; i < ms->possible_cpus->len; i++) { >> + X86CPUTopoInfo topo; >> + >> + ms->possible_cpus->cpus[i].type = ms->cpu_type; >> + ms->possible_cpus->cpus[i].vcpus_count = 1; >> + ms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(x86ms, i); >> + x86_topo_ids_from_apicid(ms->possible_cpus->cpus[i].arch_id, >> + x86ms->smp_dies, ms->smp.cores, >> + ms->smp.threads, &topo); >> + ms->possible_cpus->cpus[i].props.has_socket_id = true; >> + ms->possible_cpus->cpus[i].props.socket_id = topo.pkg_id; >> + if (x86ms->smp_dies > 1) { >> + ms->possible_cpus->cpus[i].props.has_die_id = true; >> + ms->possible_cpus->cpus[i].props.die_id = topo.die_id; >> + } >> + ms->possible_cpus->cpus[i].props.has_core_id = true; >> + ms->possible_cpus->cpus[i].props.core_id = topo.core_id; >> + ms->possible_cpus->cpus[i].props.has_thread_id = true; >> + ms->possible_cpus->cpus[i].props.thread_id = topo.smt_id; >> + } >> + return ms->possible_cpus; >> +} >> + >> +void x86_system_rom_init(MemoryRegion *rom_memory, bool isapc_ram_fw) >> +{ >> + char *filename; >> + MemoryRegion *bios, *isa_bios; >> + int bios_size, isa_bios_size; >> + int ret; >> + >> + /* BIOS load */ >> + if (bios_name == NULL) { >> + bios_name = BIOS_FILENAME; >> + } >> + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); >> + if (filename) { >> + bios_size = get_image_size(filename); >> + } else { >> + bios_size = -1; >> + } >> + if (bios_size <= 0 || >> + (bios_size % 65536) != 0) { >> + goto bios_error; >> + } >> + bios = g_malloc(sizeof(*bios)); >> + memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal); >> + if (!isapc_ram_fw) { >> + memory_region_set_readonly(bios, true); >> + } >> + ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); >> + if (ret != 0) { >> + bios_error: >> + fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name); >> + exit(1); >> + } >> + g_free(filename); >> + >> + /* map the last 128KB of the BIOS in ISA space */ >> + isa_bios_size = MIN(bios_size, 128 * KiB); >> + isa_bios = g_malloc(sizeof(*isa_bios)); >> + memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, >> + bios_size - isa_bios_size, isa_bios_size); >> + memory_region_add_subregion_overlap(rom_memory, >> + 0x100000 - isa_bios_size, >> + isa_bios, >> + 1); >> + if (!isapc_ram_fw) { >> + memory_region_set_readonly(isa_bios, true); >> + } >> + >> + /* map all the bios at the top of memory */ >> + memory_region_add_subregion(rom_memory, >> + (uint32_t)(-bios_size), >> + bios); >> +} >> + >> +static long get_file_size(FILE *f) >> +{ >> + long where, size; >> + >> + /* XXX: on Unix systems, using fstat() probably makes more sense */ >> + >> + where = ftell(f); >> + fseek(f, 0, SEEK_END); >> + size = ftell(f); >> + fseek(f, where, SEEK_SET); >> + >> + return size; >> +} >> + >> +struct setup_data { >> + uint64_t next; >> + uint32_t type; >> + uint32_t len; >> + uint8_t data[0]; >> +} __attribute__((packed)); >> + >> +void load_linux(X86MachineState *x86ms, >> + FWCfgState *fw_cfg, >> + unsigned acpi_data_size, >> + bool linuxboot_dma_enabled, >> + bool pvh_enabled) >> +{ >> + uint16_t protocol; >> + int setup_size, kernel_size, cmdline_size; >> + int dtb_size, setup_data_offset; >> + uint32_t initrd_max; >> + uint8_t header[8192], *setup, *kernel; >> + hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; >> + FILE *f; >> + char *vmode; >> + MachineState *machine = MACHINE(x86ms); >> + struct setup_data *setup_data; >> + const char *kernel_filename = machine->kernel_filename; >> + const char *initrd_filename = machine->initrd_filename; >> + const char *dtb_filename = machine->dtb; >> + const char *kernel_cmdline = machine->kernel_cmdline; >> + >> + /* Align to 16 bytes as a paranoia measure */ >> + cmdline_size = (strlen(kernel_cmdline)+16) & ~15; >> + >> + /* load the kernel header */ >> + f = fopen(kernel_filename, "rb"); >> + if (!f || !(kernel_size = get_file_size(f)) || >> + fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != >> + MIN(ARRAY_SIZE(header), kernel_size)) { >> + fprintf(stderr, "qemu: could not load kernel '%s': %s\n", >> + kernel_filename, strerror(errno)); >> + exit(1); >> + } >> + >> + /* kernel protocol version */ >> +#if 0 >> + fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202)); >> +#endif >> + if (ldl_p(header+0x202) == 0x53726448) { >> + protocol = lduw_p(header+0x206); >> + } else { >> + size_t pvh_start_addr; >> + uint32_t mh_load_addr = 0; >> + uint32_t elf_kernel_size = 0; >> + /* >> + * This could be a multiboot kernel. If it is, let's stop treating it >> + * like a Linux kernel. >> + * Note: some multiboot images could be in the ELF format (the same of >> + * PVH), so we try multiboot first since we check the multiboot magic >> + * header before to load it. >> + */ >> + if (load_multiboot(fw_cfg, f, kernel_filename, initrd_filename, >> + kernel_cmdline, kernel_size, header)) { >> + return; >> + } >> + /* >> + * Check if the file is an uncompressed kernel file (ELF) and load it, >> + * saving the PVH entry point used by the x86/HVM direct boot ABI. >> + * If load_elfboot() is successful, populate the fw_cfg info. >> + */ >> + if (pvh_enabled && >> + pvh_load_elfboot(kernel_filename, >> + &mh_load_addr, &elf_kernel_size)) { >> + fclose(f); >> + >> + pvh_start_addr = pvh_get_start_addr(); >> + >> + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr); >> + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); >> + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size); >> + >> + fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, >> + strlen(kernel_cmdline) + 1); >> + fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); >> + >> + fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header)); >> + fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, >> + header, sizeof(header)); >> + >> + /* load initrd */ >> + if (initrd_filename) { >> + GMappedFile *mapped_file; >> + gsize initrd_size; >> + gchar *initrd_data; >> + GError *gerr = NULL; >> + >> + mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); >> + if (!mapped_file) { >> + fprintf(stderr, "qemu: error reading initrd %s: %s\n", >> + initrd_filename, gerr->message); >> + exit(1); >> + } >> + x86ms->initrd_mapped_file = mapped_file; >> + >> + initrd_data = g_mapped_file_get_contents(mapped_file); >> + initrd_size = g_mapped_file_get_length(mapped_file); >> + initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; >> + if (initrd_size >= initrd_max) { >> + fprintf(stderr, "qemu: initrd is too large, cannot support." >> + "(max: %"PRIu32", need %"PRId64")\n", >> + initrd_max, (uint64_t)initrd_size); >> + exit(1); >> + } >> + >> + initrd_addr = (initrd_max - initrd_size) & ~4095; >> + >> + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); >> + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); >> + fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, >> + initrd_size); >> + } >> + >> + option_rom[nb_option_roms].bootindex = 0; >> + option_rom[nb_option_roms].name = "pvh.bin"; >> + nb_option_roms++; >> + >> + return; >> + } >> + protocol = 0; >> + } >> + >> + if (protocol < 0x200 || !(header[0x211] & 0x01)) { >> + /* Low kernel */ >> + real_addr = 0x90000; >> + cmdline_addr = 0x9a000 - cmdline_size; >> + prot_addr = 0x10000; >> + } else if (protocol < 0x202) { >> + /* High but ancient kernel */ >> + real_addr = 0x90000; >> + cmdline_addr = 0x9a000 - cmdline_size; >> + prot_addr = 0x100000; >> + } else { >> + /* High and recent kernel */ >> + real_addr = 0x10000; >> + cmdline_addr = 0x20000; >> + prot_addr = 0x100000; >> + } >> + >> +#if 0 >> + fprintf(stderr, >> + "qemu: real_addr = 0x" TARGET_FMT_plx "\n" >> + "qemu: cmdline_addr = 0x" TARGET_FMT_plx "\n" >> + "qemu: prot_addr = 0x" TARGET_FMT_plx "\n", >> + real_addr, >> + cmdline_addr, >> + prot_addr); >> +#endif >> + >> + /* highest address for loading the initrd */ >> + if (protocol >= 0x20c && >> + lduw_p(header+0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) { >> + /* >> + * Linux has supported initrd up to 4 GB for a very long time (2007, >> + * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013), >> + * though it only sets initrd_max to 2 GB to "work around bootloader >> + * bugs". Luckily, QEMU firmware(which does something like bootloader) >> + * has supported this. >> + * >> + * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can >> + * be loaded into any address. >> + * >> + * In addition, initrd_max is uint32_t simply because QEMU doesn't >> + * support the 64-bit boot protocol (specifically the ext_ramdisk_image >> + * field). >> + * >> + * Therefore here just limit initrd_max to UINT32_MAX simply as well. >> + */ >> + initrd_max = UINT32_MAX; >> + } else if (protocol >= 0x203) { >> + initrd_max = ldl_p(header+0x22c); >> + } else { >> + initrd_max = 0x37ffffff; >> + } >> + >> + if (initrd_max >= x86ms->below_4g_mem_size - acpi_data_size) { >> + initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; >> + } >> + >> + fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); >> + fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline)+1); >> + fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); >> + >> + if (protocol >= 0x202) { >> + stl_p(header+0x228, cmdline_addr); >> + } else { >> + stw_p(header+0x20, 0xA33F); >> + stw_p(header+0x22, cmdline_addr-real_addr); >> + } >> + >> + /* handle vga= parameter */ >> + vmode = strstr(kernel_cmdline, "vga="); >> + if (vmode) { >> + unsigned int video_mode; >> + /* skip "vga=" */ >> + vmode += 4; >> + if (!strncmp(vmode, "normal", 6)) { >> + video_mode = 0xffff; >> + } else if (!strncmp(vmode, "ext", 3)) { >> + video_mode = 0xfffe; >> + } else if (!strncmp(vmode, "ask", 3)) { >> + video_mode = 0xfffd; >> + } else { >> + video_mode = strtol(vmode, NULL, 0); >> + } >> + stw_p(header+0x1fa, video_mode); >> + } >> + >> + /* loader type */ >> + /* High nybble = B reserved for QEMU; low nybble is revision number. >> + If this code is substantially changed, you may want to consider >> + incrementing the revision. */ >> + if (protocol >= 0x200) { >> + header[0x210] = 0xB0; >> + } >> + /* heap */ >> + if (protocol >= 0x201) { >> + header[0x211] |= 0x80; /* CAN_USE_HEAP */ >> + stw_p(header+0x224, cmdline_addr-real_addr-0x200); >> + } >> + >> + /* load initrd */ >> + if (initrd_filename) { >> + GMappedFile *mapped_file; >> + gsize initrd_size; >> + gchar *initrd_data; >> + GError *gerr = NULL; >> + >> + if (protocol < 0x200) { >> + fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n"); >> + exit(1); >> + } >> + >> + mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); >> + if (!mapped_file) { >> + fprintf(stderr, "qemu: error reading initrd %s: %s\n", >> + initrd_filename, gerr->message); >> + exit(1); >> + } >> + x86ms->initrd_mapped_file = mapped_file; >> + >> + initrd_data = g_mapped_file_get_contents(mapped_file); >> + initrd_size = g_mapped_file_get_length(mapped_file); >> + if (initrd_size >= initrd_max) { >> + fprintf(stderr, "qemu: initrd is too large, cannot support." >> + "(max: %"PRIu32", need %"PRId64")\n", >> + initrd_max, (uint64_t)initrd_size); >> + exit(1); >> + } >> + >> + initrd_addr = (initrd_max-initrd_size) & ~4095; >> + >> + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); >> + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); >> + fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size); >> + >> + stl_p(header+0x218, initrd_addr); >> + stl_p(header+0x21c, initrd_size); >> + } >> + >> + /* load kernel and setup */ >> + setup_size = header[0x1f1]; >> + if (setup_size == 0) { >> + setup_size = 4; >> + } >> + setup_size = (setup_size+1)*512; >> + if (setup_size > kernel_size) { >> + fprintf(stderr, "qemu: invalid kernel header\n"); >> + exit(1); >> + } >> + kernel_size -= setup_size; >> + >> + setup = g_malloc(setup_size); >> + kernel = g_malloc(kernel_size); >> + fseek(f, 0, SEEK_SET); >> + if (fread(setup, 1, setup_size, f) != setup_size) { >> + fprintf(stderr, "fread() failed\n"); >> + exit(1); >> + } >> + if (fread(kernel, 1, kernel_size, f) != kernel_size) { >> + fprintf(stderr, "fread() failed\n"); >> + exit(1); >> + } >> + fclose(f); >> + >> + /* append dtb to kernel */ >> + if (dtb_filename) { >> + if (protocol < 0x209) { >> + fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n"); >> + exit(1); >> + } >> + >> + dtb_size = get_image_size(dtb_filename); >> + if (dtb_size <= 0) { >> + fprintf(stderr, "qemu: error reading dtb %s: %s\n", >> + dtb_filename, strerror(errno)); >> + exit(1); >> + } >> + >> + setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); >> + kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; >> + kernel = g_realloc(kernel, kernel_size); >> + >> + stq_p(header+0x250, prot_addr + setup_data_offset); >> + >> + setup_data = (struct setup_data *)(kernel + setup_data_offset); >> + setup_data->next = 0; >> + setup_data->type = cpu_to_le32(SETUP_DTB); >> + setup_data->len = cpu_to_le32(dtb_size); >> + >> + load_image_size(dtb_filename, setup_data->data, dtb_size); >> + } >> + >> + memcpy(setup, header, MIN(sizeof(header), setup_size)); >> + >> + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); >> + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); >> + fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); >> + >> + fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); >> + fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); >> + fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); >> + >> + option_rom[nb_option_roms].bootindex = 0; >> + option_rom[nb_option_roms].name = "linuxboot.bin"; >> + if (linuxboot_dma_enabled && fw_cfg_dma_enabled(fw_cfg)) { >> + option_rom[nb_option_roms].name = "linuxboot_dma.bin"; >> + } >> + nb_option_roms++; >> +} >> + >> +static void x86_machine_get_max_ram_below_4g(Object *obj, Visitor *v, >> + const char *name, void *opaque, >> + Error **errp) >> +{ >> + X86MachineState *x86ms = X86_MACHINE(obj); >> + uint64_t value = x86ms->max_ram_below_4g; >> + >> + visit_type_size(v, name, &value, errp); >> +} >> + >> +static void x86_machine_set_max_ram_below_4g(Object *obj, Visitor *v, >> + const char *name, void *opaque, >> + Error **errp) >> +{ >> + X86MachineState *x86ms = X86_MACHINE(obj); >> + Error *error = NULL; >> + uint64_t value; >> + >> + visit_type_size(v, name, &value, &error); >> + if (error) { >> + error_propagate(errp, error); >> + return; >> + } >> + if (value > 4 * GiB) { >> + error_setg(&error, >> + "Machine option 'max-ram-below-4g=%"PRIu64 >> + "' expects size less than or equal to 4G", value); >> + error_propagate(errp, error); >> + return; >> + } >> + >> + if (value < 1 * MiB) { >> + warn_report("Only %" PRIu64 " bytes of RAM below the 4GiB boundary," >> + "BIOS may not work with less than 1MiB", value); >> + } >> + >> + x86ms->max_ram_below_4g = value; >> +} >> + >> +static void x86_machine_initfn(Object *obj) >> +{ >> + X86MachineState *x86ms = X86_MACHINE(obj); >> + >> + x86ms->max_ram_below_4g = 0; /* use default */ >> + x86ms->smp_dies = 1; >> +} >> + >> +static void x86_machine_class_init(ObjectClass *oc, void *data) >> +{ >> + MachineClass *mc = MACHINE_CLASS(oc); >> + >> + mc->cpu_index_to_instance_props = x86_cpu_index_to_props; >> + mc->get_default_cpu_node_id = x86_get_default_cpu_node_id; >> + mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids; >> + >> + object_class_property_add(oc, X86_MACHINE_MAX_RAM_BELOW_4G, "size", >> + x86_machine_get_max_ram_below_4g, x86_machine_set_max_ram_below_4g, >> + NULL, NULL, &error_abort); >> + >> + object_class_property_set_description(oc, X86_MACHINE_MAX_RAM_BELOW_4G, >> + "Maximum ram below the 4G boundary (32bit boundary)", &error_abort); >> +} >> + >> +static const TypeInfo x86_machine_info = { >> + .name = TYPE_X86_MACHINE, >> + .parent = TYPE_MACHINE, >> + .abstract = true, >> + .instance_size = sizeof(X86MachineState), >> + .instance_init = x86_machine_initfn, >> + .class_size = sizeof(X86MachineClass), >> + .class_init = x86_machine_class_init, Don't we also have: .interfaces = (InterfaceInfo[]) { { TYPE_NMI }, { } }, >> +}; >> + >> +static void x86_machine_register_types(void) >> +{ >> + type_register_static(&x86_machine_info); >> +} >> + >> +type_init(x86_machine_register_types) >> diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c >> index 1ede055387..e621dde6c3 100644 >> --- a/hw/intc/ioapic.c >> +++ b/hw/intc/ioapic.c >> @@ -23,6 +23,7 @@ >> #include "qemu/osdep.h" >> #include "qapi/error.h" >> #include "monitor/monitor.h" >> +#include "hw/i386/x86.h" >> #include "hw/i386/pc.h" >> #include "hw/i386/apic.h" >> #include "hw/i386/ioapic.h" >> @@ -89,7 +90,7 @@ static void ioapic_entry_parse(uint64_t entry, struct ioapic_entry_info *info) >> >> static void ioapic_service(IOAPICCommonState *s) >> { >> - AddressSpace *ioapic_as = PC_MACHINE(qdev_get_machine())->ioapic_as; >> + AddressSpace *ioapic_as = X86_MACHINE(qdev_get_machine())->ioapic_as; >> struct ioapic_entry_info info; >> uint8_t i; >> uint32_t mask; >> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h >> index 062feeb69e..de28d55e5c 100644 >> --- a/include/hw/i386/pc.h >> +++ b/include/hw/i386/pc.h >> @@ -3,6 +3,7 @@ >> >> #include "exec/memory.h" >> #include "hw/boards.h" >> +#include "hw/i386/x86.h" >> #include "hw/isa/isa.h" >> #include "hw/block/fdc.h" >> #include "hw/block/flash.h" >> @@ -27,7 +28,7 @@ >> */ >> struct PCMachineState { >> /*< private >*/ >> - MachineState parent_obj; >> + X86MachineState parent_obj; >> >> /* <public> */ >> >> @@ -36,15 +37,10 @@ struct PCMachineState { >> >> /* Pointers to devices and objects: */ >> HotplugHandler *acpi_dev; >> - ISADevice *rtc; >> PCIBus *bus; >> - FWCfgState *fw_cfg; >> - qemu_irq *gsi; >> PFlashCFI01 *flash[2]; >> - GMappedFile *initrd_mapped_file; >> >> /* Configuration options: */ >> - uint64_t max_ram_below_4g; >> OnOffAuto vmport; >> OnOffAuto smm; >> >> @@ -53,27 +49,13 @@ struct PCMachineState { >> bool sata_enabled; >> bool pit_enabled; >> >> - /* RAM information (sizes, addresses, configuration): */ >> - ram_addr_t below_4g_mem_size, above_4g_mem_size; >> - >> - /* CPU and apic information: */ >> - bool apic_xrupt_override; >> - unsigned apic_id_limit; >> - uint16_t boot_cpus; >> - unsigned smp_dies; >> - >> /* NUMA information: */ >> uint64_t numa_nodes; >> uint64_t *node_mem; >> - >> - /* Address space used by IOAPIC device. All IOAPIC interrupts >> - * will be translated to MSI messages in the address space. */ >> - AddressSpace *ioapic_as; >> }; >> >> #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device" >> #define PC_MACHINE_DEVMEM_REGION_SIZE "device-memory-region-size" >> -#define PC_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g" >> #define PC_MACHINE_VMPORT "vmport" >> #define PC_MACHINE_SMM "smm" >> #define PC_MACHINE_SMBUS "smbus" >> @@ -139,9 +121,6 @@ typedef struct PCMachineClass { >> >> /* use PVH to load kernels that support this feature */ >> bool pvh_enabled; >> - >> - /* Enables contiguous-apic-ID mode */ >> - bool compat_apic_id_mode; >> } PCMachineClass; >> >> #define TYPE_PC_MACHINE "generic-pc-machine" >> @@ -193,10 +172,6 @@ bool pc_machine_is_smm_enabled(PCMachineState *pcms); >> void pc_register_ferr_irq(qemu_irq irq); >> void pc_acpi_smi_interrupt(void *opaque, int irq, int level); >> >> -void pc_cpus_init(PCMachineState *pcms); >> -void pc_hot_add_cpu(MachineState *ms, const int64_t id, Error **errp); >> -void pc_smp_parse(MachineState *ms, QemuOpts *opts); >> - >> void pc_guest_info_init(PCMachineState *pcms); >> >> #define PCI_HOST_PROP_PCI_HOLE_START "pci-hole-start" >> diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h >> new file mode 100644 >> index 0000000000..5980090b29 >> --- /dev/null >> +++ b/include/hw/i386/x86.h >> @@ -0,0 +1,97 @@ >> +/* >> + * Copyright (c) 2019 Red Hat, Inc. >> + * >> + * This program is free software; you can redistribute it and/or modify it >> + * under the terms and conditions of the GNU General Public License, >> + * version 2 or later, as published by the Free Software Foundation. >> + * >> + * This program is distributed in the hope it will be useful, but WITHOUT >> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or >> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for >> + * more details. >> + * >> + * You should have received a copy of the GNU General Public License along with >> + * this program. If not, see <http://www.gnu.org/licenses/>. >> + */ >> + >> +#ifndef HW_I386_X86_H >> +#define HW_I386_X86_H >> + >> +#include "qemu-common.h" >> +#include "exec/hwaddr.h" >> +#include "qemu/notify.h" >> + >> +#include "hw/boards.h" >> +#include "hw/nmi.h" >> + >> +typedef struct { >> + /*< private >*/ >> + MachineClass parent; >> + >> + /*< public >*/ >> + >> + /* Enables contiguous-apic-ID mode */ >> + bool compat_apic_id_mode; >> +} X86MachineClass; >> + >> +typedef struct { >> + /*< private >*/ >> + MachineState parent; >> + >> + /*< public >*/ >> + >> + /* Pointers to devices and objects: */ >> + ISADevice *rtc; >> + FWCfgState *fw_cfg; >> + qemu_irq *gsi; >> + GMappedFile *initrd_mapped_file; >> + >> + /* Configuration options: */ >> + uint64_t max_ram_below_4g; >> + >> + /* RAM information (sizes, addresses, configuration): */ >> + ram_addr_t below_4g_mem_size, above_4g_mem_size; >> + >> + /* CPU and apic information: */ >> + bool apic_xrupt_override; >> + unsigned apic_id_limit; >> + uint16_t boot_cpus; >> + unsigned smp_dies; >> + >> + /* Address space used by IOAPIC device. All IOAPIC interrupts >> + * will be translated to MSI messages in the address space. */ >> + AddressSpace *ioapic_as; >> +} X86MachineState; >> + >> +#define X86_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g" >> + >> +#define TYPE_X86_MACHINE MACHINE_TYPE_NAME("x86") Maybe we should name it TYPE_X86_BASE_MACHINE (or COMMON?) since it is not a real machine, but a abstract base class. >> +#define X86_MACHINE(obj) \ >> + OBJECT_CHECK(X86MachineState, (obj), TYPE_X86_MACHINE) >> +#define X86_MACHINE_GET_CLASS(obj) \ >> + OBJECT_GET_CLASS(X86MachineClass, obj, TYPE_X86_MACHINE) >> +#define X86_MACHINE_CLASS(class) \ >> + OBJECT_CLASS_CHECK(X86MachineClass, class, TYPE_X86_MACHINE) >> + >> +uint32_t x86_cpu_apic_id_from_index(X86MachineState *x86ms, >> + unsigned int cpu_index); >> + >> +void x86_cpus_init(X86MachineState *pcms, int default_cpu_version); >> +void x86_hot_add_cpu(MachineState *ms, const int64_t id, Error **errp); >> +void x86_smp_parse(MachineState *ms, QemuOpts *opts); >> +void x86_nmi(NMIState *n, int cpu_index, Error **errp); >> + >> +CpuInstanceProperties x86_cpu_index_to_props(MachineState *ms, >> + unsigned cpu_index); >> +int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx); >> +const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms); >> + >> +void x86_system_rom_init(MemoryRegion *rom_memory, bool isapc_ram_fw); >> + >> +void load_linux(X86MachineState *x86ms, > > Maybe rename x86_load_linux()? > >> + FWCfgState *fw_cfg, >> + unsigned acpi_data_size, >> + bool linuxboot_dma_enabled, >> + bool pvh_enabled); >> + >> +#endif >> > > Patch looks good, however I'd split it as: > > 1/ rename functions x86_* > 2/ export functions, add "hw/i386/x86.h" > 3/ move functions to hw/i386/x86.c > 4/ add/use X86MachineState > > Anyhow if the maintainer is happy as it: > Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> >
diff --git a/hw/acpi/cpu_hotplug.c b/hw/acpi/cpu_hotplug.c index 6e8293aac9..3ac2045a95 100644 --- a/hw/acpi/cpu_hotplug.c +++ b/hw/acpi/cpu_hotplug.c @@ -128,7 +128,7 @@ void build_legacy_cpu_hotplug_aml(Aml *ctx, MachineState *machine, Aml *one = aml_int(1); MachineClass *mc = MACHINE_GET_CLASS(machine); const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(machine); - PCMachineState *pcms = PC_MACHINE(machine); + X86MachineState *x86ms = X86_MACHINE(machine); /* * _MAT method - creates an madt apic buffer @@ -236,9 +236,9 @@ void build_legacy_cpu_hotplug_aml(Aml *ctx, MachineState *machine, /* The current AML generator can cover the APIC ID range [0..255], * inclusive, for VCPU hotplug. */ QEMU_BUILD_BUG_ON(ACPI_CPU_HOTPLUG_ID_LIMIT > 256); - if (pcms->apic_id_limit > ACPI_CPU_HOTPLUG_ID_LIMIT) { + if (x86ms->apic_id_limit > ACPI_CPU_HOTPLUG_ID_LIMIT) { error_report("max_cpus is too large. APIC ID of last CPU is %u", - pcms->apic_id_limit - 1); + x86ms->apic_id_limit - 1); exit(1); } @@ -315,8 +315,8 @@ void build_legacy_cpu_hotplug_aml(Aml *ctx, MachineState *machine, * ith up to 255 elements. Windows guests up to win2k8 fail when * VarPackageOp is used. */ - pkg = pcms->apic_id_limit <= 255 ? aml_package(pcms->apic_id_limit) : - aml_varpackage(pcms->apic_id_limit); + pkg = x86ms->apic_id_limit <= 255 ? aml_package(x86ms->apic_id_limit) : + aml_varpackage(x86ms->apic_id_limit); for (i = 0, apic_idx = 0; i < apic_ids->len; i++) { int apic_id = apic_ids->cpus[i].arch_id; diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs index 149712db07..5b4b3a672e 100644 --- a/hw/i386/Makefile.objs +++ b/hw/i386/Makefile.objs @@ -1,6 +1,7 @@ obj-$(CONFIG_KVM) += kvm/ obj-y += multiboot.o obj-y += pvh.o +obj-y += x86.o obj-y += pc.o obj-y += e820.o obj-$(CONFIG_I440FX) += pc_piix.o diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index e54e571a75..76e18d3285 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -29,6 +29,7 @@ #include "hw/pci/pci.h" #include "hw/core/cpu.h" #include "target/i386/cpu.h" +#include "hw/i386/x86.h" #include "hw/misc/pvpanic.h" #include "hw/timer/hpet.h" #include "hw/acpi/acpi-defs.h" @@ -361,6 +362,7 @@ static void build_madt(GArray *table_data, BIOSLinker *linker, PCMachineState *pcms) { MachineClass *mc = MACHINE_GET_CLASS(pcms); + X86MachineState *x86ms = X86_MACHINE(pcms); const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(MACHINE(pcms)); int madt_start = table_data->len; AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(pcms->acpi_dev); @@ -390,7 +392,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, PCMachineState *pcms) io_apic->address = cpu_to_le32(IO_APIC_DEFAULT_ADDRESS); io_apic->interrupt = cpu_to_le32(0); - if (pcms->apic_xrupt_override) { + if (x86ms->apic_xrupt_override) { intsrcovr = acpi_data_push(table_data, sizeof *intsrcovr); intsrcovr->type = ACPI_APIC_XRUPT_OVERRIDE; intsrcovr->length = sizeof(*intsrcovr); @@ -1817,8 +1819,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, CrsRangeEntry *entry; Aml *dsdt, *sb_scope, *scope, *dev, *method, *field, *pkg, *crs; CrsRangeSet crs_range_set; - PCMachineState *pcms = PC_MACHINE(machine); PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine); + X86MachineState *x86ms = X86_MACHINE(machine); AcpiMcfgInfo mcfg; uint32_t nr_mem = machine->ram_slots; int root_bus_limit = 0xFF; @@ -2083,7 +2085,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, * with half of the 16-bit control register. Hence, the total size * of the i/o region used is FW_CFG_CTL_SIZE; when using DMA, the * DMA control register is located at FW_CFG_DMA_IO_BASE + 4 */ - uint8_t io_size = object_property_get_bool(OBJECT(pcms->fw_cfg), + uint8_t io_size = object_property_get_bool(OBJECT(x86ms->fw_cfg), "dma_enabled", NULL) ? ROUND_UP(FW_CFG_CTL_SIZE, 4) + sizeof(dma_addr_t) : FW_CFG_CTL_SIZE; @@ -2318,6 +2320,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) MachineClass *mc = MACHINE_GET_CLASS(machine); const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(machine); PCMachineState *pcms = PC_MACHINE(machine); + X86MachineState *x86ms = X86_MACHINE(machine); ram_addr_t hotplugabble_address_space_size = object_property_get_int(OBJECT(pcms), PC_MACHINE_DEVMEM_REGION_SIZE, NULL); @@ -2386,16 +2389,16 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) } /* Cut out the ACPI_PCI hole */ - if (mem_base <= pcms->below_4g_mem_size && - next_base > pcms->below_4g_mem_size) { - mem_len -= next_base - pcms->below_4g_mem_size; + if (mem_base <= x86ms->below_4g_mem_size && + next_base > x86ms->below_4g_mem_size) { + mem_len -= next_base - x86ms->below_4g_mem_size; if (mem_len > 0) { numamem = acpi_data_push(table_data, sizeof *numamem); build_srat_memory(numamem, mem_base, mem_len, i - 1, MEM_AFFINITY_ENABLED); } mem_base = 1ULL << 32; - mem_len = next_base - pcms->below_4g_mem_size; + mem_len = next_base - x86ms->below_4g_mem_size; next_base = mem_base + mem_len; } @@ -2614,6 +2617,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) { PCMachineState *pcms = PC_MACHINE(machine); PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); + X86MachineState *x86ms = X86_MACHINE(machine); GArray *table_offsets; unsigned facs, dsdt, rsdt, fadt; AcpiPmInfo pm; @@ -2775,7 +2779,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) */ int legacy_aml_len = pcmc->legacy_acpi_table_size + - ACPI_BUILD_LEGACY_CPU_AML_SIZE * pcms->apic_id_limit; + ACPI_BUILD_LEGACY_CPU_AML_SIZE * x86ms->apic_id_limit; int legacy_table_size = ROUND_UP(tables_blob->len - aml_len + legacy_aml_len, ACPI_BUILD_ALIGN_SIZE); @@ -2865,13 +2869,14 @@ void acpi_setup(void) { PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); + X86MachineState *x86ms = X86_MACHINE(pcms); AcpiBuildTables tables; AcpiBuildState *build_state; Object *vmgenid_dev; TPMIf *tpm; static FwCfgTPMConfig tpm_config; - if (!pcms->fw_cfg) { + if (!x86ms->fw_cfg) { ACPI_BUILD_DPRINTF("No fw cfg. Bailing out.\n"); return; } @@ -2902,7 +2907,7 @@ void acpi_setup(void) acpi_add_rom_blob(acpi_build_update, build_state, tables.linker->cmd_blob, "etc/table-loader", 0); - fw_cfg_add_file(pcms->fw_cfg, ACPI_BUILD_TPMLOG_FILE, + fw_cfg_add_file(x86ms->fw_cfg, ACPI_BUILD_TPMLOG_FILE, tables.tcpalog->data, acpi_data_len(tables.tcpalog)); tpm = tpm_find(); @@ -2912,13 +2917,13 @@ void acpi_setup(void) .tpm_version = tpm_get_version(tpm), .tpmppi_version = TPM_PPI_VERSION_1_30 }; - fw_cfg_add_file(pcms->fw_cfg, "etc/tpm/config", + fw_cfg_add_file(x86ms->fw_cfg, "etc/tpm/config", &tpm_config, sizeof tpm_config); } vmgenid_dev = find_vmgenid_dev(); if (vmgenid_dev) { - vmgenid_add_fw_cfg(VMGENID(vmgenid_dev), pcms->fw_cfg, + vmgenid_add_fw_cfg(VMGENID(vmgenid_dev), x86ms->fw_cfg, tables.vmgenid); } @@ -2931,7 +2936,7 @@ void acpi_setup(void) uint32_t rsdp_size = acpi_data_len(tables.rsdp); build_state->rsdp = g_memdup(tables.rsdp->data, rsdp_size); - fw_cfg_add_file_callback(pcms->fw_cfg, ACPI_BUILD_RSDP_FILE, + fw_cfg_add_file_callback(x86ms->fw_cfg, ACPI_BUILD_RSDP_FILE, acpi_build_update, NULL, build_state, build_state->rsdp, rsdp_size, true); build_state->rsdp_mr = NULL; diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 08884523e2..bb3b5b4563 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -21,6 +21,7 @@ */ #include "qemu/osdep.h" +#include "hw/i386/x86.h" #include "hw/i386/pc.h" #include "hw/pci/msi.h" #include "hw/pci/pci_bus.h" @@ -1537,6 +1538,7 @@ static void amdvi_realize(DeviceState *dev, Error **err) X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); MachineState *ms = MACHINE(qdev_get_machine()); PCMachineState *pcms = PC_MACHINE(ms); + X86MachineState *x86ms = X86_MACHINE(ms); PCIBus *bus = pcms->bus; s->iotlb = g_hash_table_new_full(amdvi_uint64_hash, @@ -1565,7 +1567,7 @@ static void amdvi_realize(DeviceState *dev, Error **err) } /* Pseudo address space under root PCI bus. */ - pcms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID); + x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID); /* set up MMIO */ memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, "amdvi-mmio", diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 75ca6f9c70..21f091c654 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -29,6 +29,7 @@ #include "hw/pci/pci.h" #include "hw/pci/pci_bus.h" #include "hw/qdev-properties.h" +#include "hw/i386/x86.h" #include "hw/i386/pc.h" #include "hw/i386/apic-msidef.h" #include "hw/boards.h" @@ -3703,6 +3704,7 @@ static void vtd_realize(DeviceState *dev, Error **errp) { MachineState *ms = MACHINE(qdev_get_machine()); PCMachineState *pcms = PC_MACHINE(ms); + X86MachineState *x86ms = X86_MACHINE(ms); PCIBus *bus = pcms->bus; IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev); X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); @@ -3743,7 +3745,7 @@ static void vtd_realize(DeviceState *dev, Error **errp) sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR); pci_setup_iommu(bus, vtd_host_dma_iommu, dev); /* Pseudo address space under root PCI bus. */ - pcms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC); + x86ms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC); } static void vtd_class_init(ObjectClass *klass, void *data) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 3920aa7e85..d18b461f01 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -24,6 +24,7 @@ #include "qemu/osdep.h" #include "qemu/units.h" +#include "hw/i386/x86.h" #include "hw/i386/pc.h" #include "hw/char/serial.h" #include "hw/char/parallel.h" @@ -676,6 +677,7 @@ void pc_cmos_init(PCMachineState *pcms, BusState *idebus0, BusState *idebus1, ISADevice *s) { + X86MachineState *x86ms = X86_MACHINE(pcms); int val; static pc_cmos_init_late_arg arg; @@ -683,12 +685,12 @@ void pc_cmos_init(PCMachineState *pcms, /* memory size */ /* base memory (first MiB) */ - val = MIN(pcms->below_4g_mem_size / KiB, 640); + val = MIN(x86ms->below_4g_mem_size / KiB, 640); rtc_set_memory(s, 0x15, val); rtc_set_memory(s, 0x16, val >> 8); /* extended memory (next 64MiB) */ - if (pcms->below_4g_mem_size > 1 * MiB) { - val = (pcms->below_4g_mem_size - 1 * MiB) / KiB; + if (x86ms->below_4g_mem_size > 1 * MiB) { + val = (x86ms->below_4g_mem_size - 1 * MiB) / KiB; } else { val = 0; } @@ -699,8 +701,8 @@ void pc_cmos_init(PCMachineState *pcms, rtc_set_memory(s, 0x30, val); rtc_set_memory(s, 0x31, val >> 8); /* memory between 16MiB and 4GiB */ - if (pcms->below_4g_mem_size > 16 * MiB) { - val = (pcms->below_4g_mem_size - 16 * MiB) / (64 * KiB); + if (x86ms->below_4g_mem_size > 16 * MiB) { + val = (x86ms->below_4g_mem_size - 16 * MiB) / (64 * KiB); } else { val = 0; } @@ -709,20 +711,20 @@ void pc_cmos_init(PCMachineState *pcms, rtc_set_memory(s, 0x34, val); rtc_set_memory(s, 0x35, val >> 8); /* memory above 4GiB */ - val = pcms->above_4g_mem_size / 65536; + val = x86ms->above_4g_mem_size / 65536; rtc_set_memory(s, 0x5b, val); rtc_set_memory(s, 0x5c, val >> 8); rtc_set_memory(s, 0x5d, val >> 16); - object_property_add_link(OBJECT(pcms), "rtc_state", + object_property_add_link(OBJECT(x86ms), "rtc_state", TYPE_ISA_DEVICE, - (Object **)&pcms->rtc, + (Object **)&x86ms->rtc, object_property_allow_set_link, OBJ_PROP_LINK_STRONG, &error_abort); - object_property_set_link(OBJECT(pcms), OBJECT(s), + object_property_set_link(OBJECT(x86ms), OBJECT(s), "rtc_state", &error_abort); - set_boot_dev(s, MACHINE(pcms)->boot_order, &error_fatal); + set_boot_dev(s, MACHINE(x86ms)->boot_order, &error_fatal); val = 0; val |= 0x02; /* FPU is there */ @@ -863,35 +865,6 @@ static void handle_a20_line_change(void *opaque, int irq, int level) x86_cpu_set_a20(cpu, level); } -/* Calculates initial APIC ID for a specific CPU index - * - * Currently we need to be able to calculate the APIC ID from the CPU index - * alone (without requiring a CPU object), as the QEMU<->Seabios interfaces have - * no concept of "CPU index", and the NUMA tables on fw_cfg need the APIC ID of - * all CPUs up to max_cpus. - */ -static uint32_t x86_cpu_apic_id_from_index(PCMachineState *pcms, - unsigned int cpu_index) -{ - MachineState *ms = MACHINE(pcms); - PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); - uint32_t correct_id; - static bool warned; - - correct_id = x86_apicid_from_cpu_idx(pcms->smp_dies, ms->smp.cores, - ms->smp.threads, cpu_index); - if (pcmc->compat_apic_id_mode) { - if (cpu_index != correct_id && !warned && !qtest_enabled()) { - error_report("APIC IDs set in compatibility mode, " - "CPU topology won't match the configuration"); - warned = true; - } - return cpu_index; - } else { - return correct_id; - } -} - static void pc_build_smbios(PCMachineState *pcms) { uint8_t *smbios_tables, *smbios_anchor; @@ -899,6 +872,7 @@ static void pc_build_smbios(PCMachineState *pcms) struct smbios_phys_mem_area *mem_array; unsigned i, array_count; MachineState *ms = MACHINE(pcms); + X86MachineState *x86ms = X86_MACHINE(pcms); X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu); /* tell smbios about cpuid version and features */ @@ -906,7 +880,7 @@ static void pc_build_smbios(PCMachineState *pcms) smbios_tables = smbios_get_table_legacy(ms, &smbios_tables_len); if (smbios_tables) { - fw_cfg_add_bytes(pcms->fw_cfg, FW_CFG_SMBIOS_ENTRIES, + fw_cfg_add_bytes(x86ms->fw_cfg, FW_CFG_SMBIOS_ENTRIES, smbios_tables, smbios_tables_len); } @@ -927,9 +901,9 @@ static void pc_build_smbios(PCMachineState *pcms) g_free(mem_array); if (smbios_anchor) { - fw_cfg_add_file(pcms->fw_cfg, "etc/smbios/smbios-tables", + fw_cfg_add_file(x86ms->fw_cfg, "etc/smbios/smbios-tables", smbios_tables, smbios_tables_len); - fw_cfg_add_file(pcms->fw_cfg, "etc/smbios/smbios-anchor", + fw_cfg_add_file(x86ms->fw_cfg, "etc/smbios/smbios-anchor", smbios_anchor, smbios_anchor_len); } } @@ -942,10 +916,11 @@ static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms) const CPUArchIdList *cpus; MachineClass *mc = MACHINE_GET_CLASS(pcms); MachineState *ms = MACHINE(pcms); + X86MachineState *x86ms = X86_MACHINE(pcms); int nb_numa_nodes = ms->numa_state->num_nodes; fw_cfg = fw_cfg_init_io_dma(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4, as); - fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus); + fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); /* FW_CFG_MAX_CPUS is a bit confusing/problematic on x86: * @@ -959,7 +934,7 @@ static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms) * So for compatibility reasons with old BIOSes we are stuck with * "etc/max-cpus" actually being apic_id_limit */ - fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)pcms->apic_id_limit); + fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)x86ms->apic_id_limit); fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size); fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES, acpi_tables, acpi_tables_len); @@ -972,374 +947,25 @@ static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms) * of nodes, one word for each VCPU->node and one word for each node to * hold the amount of memory. */ - numa_fw_cfg = g_new0(uint64_t, 1 + pcms->apic_id_limit + nb_numa_nodes); + numa_fw_cfg = g_new0(uint64_t, 1 + x86ms->apic_id_limit + nb_numa_nodes); numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes); cpus = mc->possible_cpu_arch_ids(MACHINE(pcms)); for (i = 0; i < cpus->len; i++) { unsigned int apic_id = cpus->cpus[i].arch_id; - assert(apic_id < pcms->apic_id_limit); + assert(apic_id < x86ms->apic_id_limit); numa_fw_cfg[apic_id + 1] = cpu_to_le64(cpus->cpus[i].props.node_id); } for (i = 0; i < nb_numa_nodes; i++) { - numa_fw_cfg[pcms->apic_id_limit + 1 + i] = + numa_fw_cfg[x86ms->apic_id_limit + 1 + i] = cpu_to_le64(ms->numa_state->nodes[i].node_mem); } fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg, - (1 + pcms->apic_id_limit + nb_numa_nodes) * + (1 + x86ms->apic_id_limit + nb_numa_nodes) * sizeof(*numa_fw_cfg)); return fw_cfg; } -static long get_file_size(FILE *f) -{ - long where, size; - - /* XXX: on Unix systems, using fstat() probably makes more sense */ - - where = ftell(f); - fseek(f, 0, SEEK_END); - size = ftell(f); - fseek(f, where, SEEK_SET); - - return size; -} - -struct setup_data { - uint64_t next; - uint32_t type; - uint32_t len; - uint8_t data[0]; -} __attribute__((packed)); - -static void load_linux(PCMachineState *pcms, - FWCfgState *fw_cfg) -{ - uint16_t protocol; - int setup_size, kernel_size, cmdline_size; - int dtb_size, setup_data_offset; - uint32_t initrd_max; - uint8_t header[8192], *setup, *kernel; - hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; - FILE *f; - char *vmode; - MachineState *machine = MACHINE(pcms); - PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); - struct setup_data *setup_data; - const char *kernel_filename = machine->kernel_filename; - const char *initrd_filename = machine->initrd_filename; - const char *dtb_filename = machine->dtb; - const char *kernel_cmdline = machine->kernel_cmdline; - - /* Align to 16 bytes as a paranoia measure */ - cmdline_size = (strlen(kernel_cmdline)+16) & ~15; - - /* load the kernel header */ - f = fopen(kernel_filename, "rb"); - if (!f || !(kernel_size = get_file_size(f)) || - fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != - MIN(ARRAY_SIZE(header), kernel_size)) { - fprintf(stderr, "qemu: could not load kernel '%s': %s\n", - kernel_filename, strerror(errno)); - exit(1); - } - - /* kernel protocol version */ -#if 0 - fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202)); -#endif - if (ldl_p(header+0x202) == 0x53726448) { - protocol = lduw_p(header+0x206); - } else { - size_t pvh_start_addr; - uint32_t mh_load_addr = 0; - uint32_t elf_kernel_size = 0; - /* - * This could be a multiboot kernel. If it is, let's stop treating it - * like a Linux kernel. - * Note: some multiboot images could be in the ELF format (the same of - * PVH), so we try multiboot first since we check the multiboot magic - * header before to load it. - */ - if (load_multiboot(fw_cfg, f, kernel_filename, initrd_filename, - kernel_cmdline, kernel_size, header)) { - return; - } - /* - * Check if the file is an uncompressed kernel file (ELF) and load it, - * saving the PVH entry point used by the x86/HVM direct boot ABI. - * If load_elfboot() is successful, populate the fw_cfg info. - */ - if (pcmc->pvh_enabled && - pvh_load_elfboot(kernel_filename, - &mh_load_addr, &elf_kernel_size)) { - fclose(f); - - pvh_start_addr = pvh_get_start_addr(); - - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size); - - fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, - strlen(kernel_cmdline) + 1); - fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); - - fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header)); - fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, - header, sizeof(header)); - - /* load initrd */ - if (initrd_filename) { - GMappedFile *mapped_file; - gsize initrd_size; - gchar *initrd_data; - GError *gerr = NULL; - - mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); - if (!mapped_file) { - fprintf(stderr, "qemu: error reading initrd %s: %s\n", - initrd_filename, gerr->message); - exit(1); - } - pcms->initrd_mapped_file = mapped_file; - - initrd_data = g_mapped_file_get_contents(mapped_file); - initrd_size = g_mapped_file_get_length(mapped_file); - initrd_max = pcms->below_4g_mem_size - pcmc->acpi_data_size - 1; - if (initrd_size >= initrd_max) { - fprintf(stderr, "qemu: initrd is too large, cannot support." - "(max: %"PRIu32", need %"PRId64")\n", - initrd_max, (uint64_t)initrd_size); - exit(1); - } - - initrd_addr = (initrd_max - initrd_size) & ~4095; - - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); - fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, - initrd_size); - } - - option_rom[nb_option_roms].bootindex = 0; - option_rom[nb_option_roms].name = "pvh.bin"; - nb_option_roms++; - - return; - } - protocol = 0; - } - - if (protocol < 0x200 || !(header[0x211] & 0x01)) { - /* Low kernel */ - real_addr = 0x90000; - cmdline_addr = 0x9a000 - cmdline_size; - prot_addr = 0x10000; - } else if (protocol < 0x202) { - /* High but ancient kernel */ - real_addr = 0x90000; - cmdline_addr = 0x9a000 - cmdline_size; - prot_addr = 0x100000; - } else { - /* High and recent kernel */ - real_addr = 0x10000; - cmdline_addr = 0x20000; - prot_addr = 0x100000; - } - -#if 0 - fprintf(stderr, - "qemu: real_addr = 0x" TARGET_FMT_plx "\n" - "qemu: cmdline_addr = 0x" TARGET_FMT_plx "\n" - "qemu: prot_addr = 0x" TARGET_FMT_plx "\n", - real_addr, - cmdline_addr, - prot_addr); -#endif - - /* highest address for loading the initrd */ - if (protocol >= 0x20c && - lduw_p(header+0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) { - /* - * Linux has supported initrd up to 4 GB for a very long time (2007, - * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013), - * though it only sets initrd_max to 2 GB to "work around bootloader - * bugs". Luckily, QEMU firmware(which does something like bootloader) - * has supported this. - * - * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can - * be loaded into any address. - * - * In addition, initrd_max is uint32_t simply because QEMU doesn't - * support the 64-bit boot protocol (specifically the ext_ramdisk_image - * field). - * - * Therefore here just limit initrd_max to UINT32_MAX simply as well. - */ - initrd_max = UINT32_MAX; - } else if (protocol >= 0x203) { - initrd_max = ldl_p(header+0x22c); - } else { - initrd_max = 0x37ffffff; - } - - if (initrd_max >= pcms->below_4g_mem_size - pcmc->acpi_data_size) { - initrd_max = pcms->below_4g_mem_size - pcmc->acpi_data_size - 1; - } - - fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline)+1); - fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); - - if (protocol >= 0x202) { - stl_p(header+0x228, cmdline_addr); - } else { - stw_p(header+0x20, 0xA33F); - stw_p(header+0x22, cmdline_addr-real_addr); - } - - /* handle vga= parameter */ - vmode = strstr(kernel_cmdline, "vga="); - if (vmode) { - unsigned int video_mode; - /* skip "vga=" */ - vmode += 4; - if (!strncmp(vmode, "normal", 6)) { - video_mode = 0xffff; - } else if (!strncmp(vmode, "ext", 3)) { - video_mode = 0xfffe; - } else if (!strncmp(vmode, "ask", 3)) { - video_mode = 0xfffd; - } else { - video_mode = strtol(vmode, NULL, 0); - } - stw_p(header+0x1fa, video_mode); - } - - /* loader type */ - /* High nybble = B reserved for QEMU; low nybble is revision number. - If this code is substantially changed, you may want to consider - incrementing the revision. */ - if (protocol >= 0x200) { - header[0x210] = 0xB0; - } - /* heap */ - if (protocol >= 0x201) { - header[0x211] |= 0x80; /* CAN_USE_HEAP */ - stw_p(header+0x224, cmdline_addr-real_addr-0x200); - } - - /* load initrd */ - if (initrd_filename) { - GMappedFile *mapped_file; - gsize initrd_size; - gchar *initrd_data; - GError *gerr = NULL; - - if (protocol < 0x200) { - fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n"); - exit(1); - } - - mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); - if (!mapped_file) { - fprintf(stderr, "qemu: error reading initrd %s: %s\n", - initrd_filename, gerr->message); - exit(1); - } - pcms->initrd_mapped_file = mapped_file; - - initrd_data = g_mapped_file_get_contents(mapped_file); - initrd_size = g_mapped_file_get_length(mapped_file); - if (initrd_size >= initrd_max) { - fprintf(stderr, "qemu: initrd is too large, cannot support." - "(max: %"PRIu32", need %"PRId64")\n", - initrd_max, (uint64_t)initrd_size); - exit(1); - } - - initrd_addr = (initrd_max-initrd_size) & ~4095; - - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); - fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size); - - stl_p(header+0x218, initrd_addr); - stl_p(header+0x21c, initrd_size); - } - - /* load kernel and setup */ - setup_size = header[0x1f1]; - if (setup_size == 0) { - setup_size = 4; - } - setup_size = (setup_size+1)*512; - if (setup_size > kernel_size) { - fprintf(stderr, "qemu: invalid kernel header\n"); - exit(1); - } - kernel_size -= setup_size; - - setup = g_malloc(setup_size); - kernel = g_malloc(kernel_size); - fseek(f, 0, SEEK_SET); - if (fread(setup, 1, setup_size, f) != setup_size) { - fprintf(stderr, "fread() failed\n"); - exit(1); - } - if (fread(kernel, 1, kernel_size, f) != kernel_size) { - fprintf(stderr, "fread() failed\n"); - exit(1); - } - fclose(f); - - /* append dtb to kernel */ - if (dtb_filename) { - if (protocol < 0x209) { - fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n"); - exit(1); - } - - dtb_size = get_image_size(dtb_filename); - if (dtb_size <= 0) { - fprintf(stderr, "qemu: error reading dtb %s: %s\n", - dtb_filename, strerror(errno)); - exit(1); - } - - setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); - kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; - kernel = g_realloc(kernel, kernel_size); - - stq_p(header+0x250, prot_addr + setup_data_offset); - - setup_data = (struct setup_data *)(kernel + setup_data_offset); - setup_data->next = 0; - setup_data->type = cpu_to_le32(SETUP_DTB); - setup_data->len = cpu_to_le32(dtb_size); - - load_image_size(dtb_filename, setup_data->data, dtb_size); - } - - memcpy(setup, header, MIN(sizeof(header), setup_size)); - - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); - fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); - - fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); - fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); - - option_rom[nb_option_roms].bootindex = 0; - option_rom[nb_option_roms].name = "linuxboot.bin"; - if (pcmc->linuxboot_dma_enabled && fw_cfg_dma_enabled(fw_cfg)) { - option_rom[nb_option_roms].name = "linuxboot_dma.bin"; - } - nb_option_roms++; -} - #define NE2000_NB_MAX 6 static const int ne2000_io[NE2000_NB_MAX] = { 0x300, 0x320, 0x340, 0x360, @@ -1376,157 +1002,10 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level) } } -static void pc_new_cpu(PCMachineState *pcms, int64_t apic_id, Error **errp) -{ - Object *cpu = NULL; - Error *local_err = NULL; - CPUX86State *env = NULL; - - cpu = object_new(MACHINE(pcms)->cpu_type); - - env = &X86_CPU(cpu)->env; - env->nr_dies = pcms->smp_dies; - - object_property_set_uint(cpu, apic_id, "apic-id", &local_err); - object_property_set_bool(cpu, true, "realized", &local_err); - - object_unref(cpu); - error_propagate(errp, local_err); -} - -/* - * This function is very similar to smp_parse() - * in hw/core/machine.c but includes CPU die support. - */ -void pc_smp_parse(MachineState *ms, QemuOpts *opts) -{ - PCMachineState *pcms = PC_MACHINE(ms); - - if (opts) { - unsigned cpus = qemu_opt_get_number(opts, "cpus", 0); - unsigned sockets = qemu_opt_get_number(opts, "sockets", 0); - unsigned dies = qemu_opt_get_number(opts, "dies", 1); - unsigned cores = qemu_opt_get_number(opts, "cores", 0); - unsigned threads = qemu_opt_get_number(opts, "threads", 0); - - /* compute missing values, prefer sockets over cores over threads */ - if (cpus == 0 || sockets == 0) { - cores = cores > 0 ? cores : 1; - threads = threads > 0 ? threads : 1; - if (cpus == 0) { - sockets = sockets > 0 ? sockets : 1; - cpus = cores * threads * dies * sockets; - } else { - ms->smp.max_cpus = - qemu_opt_get_number(opts, "maxcpus", cpus); - sockets = ms->smp.max_cpus / (cores * threads * dies); - } - } else if (cores == 0) { - threads = threads > 0 ? threads : 1; - cores = cpus / (sockets * dies * threads); - cores = cores > 0 ? cores : 1; - } else if (threads == 0) { - threads = cpus / (cores * dies * sockets); - threads = threads > 0 ? threads : 1; - } else if (sockets * dies * cores * threads < cpus) { - error_report("cpu topology: " - "sockets (%u) * dies (%u) * cores (%u) * threads (%u) < " - "smp_cpus (%u)", - sockets, dies, cores, threads, cpus); - exit(1); - } - - ms->smp.max_cpus = - qemu_opt_get_number(opts, "maxcpus", cpus); - - if (ms->smp.max_cpus < cpus) { - error_report("maxcpus must be equal to or greater than smp"); - exit(1); - } - - if (sockets * dies * cores * threads > ms->smp.max_cpus) { - error_report("cpu topology: " - "sockets (%u) * dies (%u) * cores (%u) * threads (%u) > " - "maxcpus (%u)", - sockets, dies, cores, threads, - ms->smp.max_cpus); - exit(1); - } - - if (sockets * dies * cores * threads != ms->smp.max_cpus) { - warn_report("Invalid CPU topology deprecated: " - "sockets (%u) * dies (%u) * cores (%u) * threads (%u) " - "!= maxcpus (%u)", - sockets, dies, cores, threads, - ms->smp.max_cpus); - } - - ms->smp.cpus = cpus; - ms->smp.cores = cores; - ms->smp.threads = threads; - pcms->smp_dies = dies; - } - - if (ms->smp.cpus > 1) { - Error *blocker = NULL; - error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED, "smp"); - replay_add_blocker(blocker); - } -} - -void pc_hot_add_cpu(MachineState *ms, const int64_t id, Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(ms); - int64_t apic_id = x86_cpu_apic_id_from_index(pcms, id); - Error *local_err = NULL; - - if (id < 0) { - error_setg(errp, "Invalid CPU id: %" PRIi64, id); - return; - } - - if (apic_id >= ACPI_CPU_HOTPLUG_ID_LIMIT) { - error_setg(errp, "Unable to add CPU: %" PRIi64 - ", resulting APIC ID (%" PRIi64 ") is too large", - id, apic_id); - return; - } - - pc_new_cpu(PC_MACHINE(ms), apic_id, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } -} - -void pc_cpus_init(PCMachineState *pcms) -{ - int i; - const CPUArchIdList *possible_cpus; - MachineState *ms = MACHINE(pcms); - MachineClass *mc = MACHINE_GET_CLASS(pcms); - PCMachineClass *pcmc = PC_MACHINE_CLASS(mc); - - x86_cpu_set_default_version(pcmc->default_cpu_version); - - /* Calculates the limit to CPU APIC ID values - * - * Limit for the APIC ID value, so that all - * CPU APIC IDs are < pcms->apic_id_limit. - * - * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init(). - */ - pcms->apic_id_limit = x86_cpu_apic_id_from_index(pcms, - ms->smp.max_cpus - 1) + 1; - possible_cpus = mc->possible_cpu_arch_ids(ms); - for (i = 0; i < ms->smp.cpus; i++) { - pc_new_cpu(pcms, possible_cpus->cpus[i].arch_id, &error_fatal); - } -} - static void pc_build_feature_control_file(PCMachineState *pcms) { MachineState *ms = MACHINE(pcms); + X86MachineState *x86ms = X86_MACHINE(pcms); X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu); CPUX86State *env = &cpu->env; uint32_t unused, ecx, edx; @@ -1550,7 +1029,7 @@ static void pc_build_feature_control_file(PCMachineState *pcms) val = g_malloc(sizeof(*val)); *val = cpu_to_le64(feature_control_bits | FEATURE_CONTROL_LOCKED); - fw_cfg_add_file(pcms->fw_cfg, "etc/msr_feature_control", val, sizeof(*val)); + fw_cfg_add_file(x86ms->fw_cfg, "etc/msr_feature_control", val, sizeof(*val)); } static void rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count) @@ -1571,10 +1050,11 @@ void pc_machine_done(Notifier *notifier, void *data) { PCMachineState *pcms = container_of(notifier, PCMachineState, machine_done); + X86MachineState *x86ms = X86_MACHINE(pcms); PCIBus *bus = pcms->bus; /* set the number of CPUs */ - rtc_set_cpus_count(pcms->rtc, pcms->boot_cpus); + rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); if (bus) { int extra_hosts = 0; @@ -1585,23 +1065,23 @@ void pc_machine_done(Notifier *notifier, void *data) extra_hosts++; } } - if (extra_hosts && pcms->fw_cfg) { + if (extra_hosts && x86ms->fw_cfg) { uint64_t *val = g_malloc(sizeof(*val)); *val = cpu_to_le64(extra_hosts); - fw_cfg_add_file(pcms->fw_cfg, + fw_cfg_add_file(x86ms->fw_cfg, "etc/extra-pci-roots", val, sizeof(*val)); } } acpi_setup(); - if (pcms->fw_cfg) { + if (x86ms->fw_cfg) { pc_build_smbios(pcms); pc_build_feature_control_file(pcms); /* update FW_CFG_NB_CPUS to account for -device added CPUs */ - fw_cfg_modify_i16(pcms->fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus); + fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); } - if (pcms->apic_id_limit > 255 && !xen_enabled()) { + if (x86ms->apic_id_limit > 255 && !xen_enabled()) { IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default()); if (!iommu || !x86_iommu_ir_supported(X86_IOMMU_DEVICE(iommu)) || @@ -1619,8 +1099,9 @@ void pc_guest_info_init(PCMachineState *pcms) { int i; MachineState *ms = MACHINE(pcms); + X86MachineState *x86ms = X86_MACHINE(pcms); - pcms->apic_xrupt_override = kvm_allows_irq0_override(); + x86ms->apic_xrupt_override = kvm_allows_irq0_override(); pcms->numa_nodes = ms->numa_state->num_nodes; pcms->node_mem = g_malloc0(pcms->numa_nodes * sizeof *pcms->node_mem); @@ -1645,14 +1126,17 @@ void xen_load_linux(PCMachineState *pcms) { int i; FWCfgState *fw_cfg; + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); + X86MachineState *x86ms = X86_MACHINE(pcms); assert(MACHINE(pcms)->kernel_filename != NULL); fw_cfg = fw_cfg_init_io(FW_CFG_IO_BASE); - fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus); + fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); rom_set_fw(fw_cfg); - load_linux(pcms, fw_cfg); + load_linux(x86ms, fw_cfg, pcmc->acpi_data_size, + pcmc->linuxboot_dma_enabled, pcmc->pvh_enabled); for (i = 0; i < nb_option_roms; i++) { assert(!strcmp(option_rom[i].name, "linuxboot.bin") || !strcmp(option_rom[i].name, "linuxboot_dma.bin") || @@ -1660,7 +1144,7 @@ void xen_load_linux(PCMachineState *pcms) !strcmp(option_rom[i].name, "multiboot.bin")); rom_add_option(option_rom[i].name, option_rom[i].bootindex); } - pcms->fw_cfg = fw_cfg; + x86ms->fw_cfg = fw_cfg; } void pc_memory_init(PCMachineState *pcms, @@ -1673,10 +1157,11 @@ void pc_memory_init(PCMachineState *pcms, MemoryRegion *ram_below_4g, *ram_above_4g; FWCfgState *fw_cfg; MachineState *machine = MACHINE(pcms); + X86MachineState *x86ms = X86_MACHINE(pcms); PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); - assert(machine->ram_size == pcms->below_4g_mem_size + - pcms->above_4g_mem_size); + assert(machine->ram_size == x86ms->below_4g_mem_size + + x86ms->above_4g_mem_size); linux_boot = (machine->kernel_filename != NULL); @@ -1690,17 +1175,17 @@ void pc_memory_init(PCMachineState *pcms, *ram_memory = ram; ram_below_4g = g_malloc(sizeof(*ram_below_4g)); memory_region_init_alias(ram_below_4g, NULL, "ram-below-4g", ram, - 0, pcms->below_4g_mem_size); + 0, x86ms->below_4g_mem_size); memory_region_add_subregion(system_memory, 0, ram_below_4g); - e820_add_entry(0, pcms->below_4g_mem_size, E820_RAM); - if (pcms->above_4g_mem_size > 0) { + e820_add_entry(0, x86ms->below_4g_mem_size, E820_RAM); + if (x86ms->above_4g_mem_size > 0) { ram_above_4g = g_malloc(sizeof(*ram_above_4g)); memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g", ram, - pcms->below_4g_mem_size, - pcms->above_4g_mem_size); + x86ms->below_4g_mem_size, + x86ms->above_4g_mem_size); memory_region_add_subregion(system_memory, 0x100000000ULL, ram_above_4g); - e820_add_entry(0x100000000ULL, pcms->above_4g_mem_size, E820_RAM); + e820_add_entry(0x100000000ULL, x86ms->above_4g_mem_size, E820_RAM); } if (!pcmc->has_reserved_memory && @@ -1735,7 +1220,7 @@ void pc_memory_init(PCMachineState *pcms, } machine->device_memory->base = - ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size, 1 * GiB); + ROUND_UP(0x100000000ULL + x86ms->above_4g_mem_size, 1 * GiB); if (pcmc->enforce_aligned_dimm) { /* size device region assuming 1G page max alignment per slot */ @@ -1786,16 +1271,17 @@ void pc_memory_init(PCMachineState *pcms, } if (linux_boot) { - load_linux(pcms, fw_cfg); + load_linux(x86ms, fw_cfg, pcmc->acpi_data_size, + pcmc->linuxboot_dma_enabled, pcmc->pvh_enabled); } for (i = 0; i < nb_option_roms; i++) { rom_add_option(option_rom[i].name, option_rom[i].bootindex); } - pcms->fw_cfg = fw_cfg; + x86ms->fw_cfg = fw_cfg; /* Init default IOAPIC address space */ - pcms->ioapic_as = &address_space_memory; + x86ms->ioapic_as = &address_space_memory; } /* @@ -1807,6 +1293,7 @@ uint64_t pc_pci_hole64_start(void) PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); MachineState *ms = MACHINE(pcms); + X86MachineState *x86ms = X86_MACHINE(pcms); uint64_t hole64_start = 0; if (pcmc->has_reserved_memory && ms->device_memory->base) { @@ -1815,7 +1302,7 @@ uint64_t pc_pci_hole64_start(void) hole64_start += memory_region_size(&ms->device_memory->mr); } } else { - hole64_start = 0x100000000ULL + pcms->above_4g_mem_size; + hole64_start = 0x100000000ULL + x86ms->above_4g_mem_size; } return ROUND_UP(hole64_start, 1 * GiB); @@ -2154,6 +1641,7 @@ static void pc_cpu_plug(HotplugHandler *hotplug_dev, Error *local_err = NULL; X86CPU *cpu = X86_CPU(dev); PCMachineState *pcms = PC_MACHINE(hotplug_dev); + X86MachineState *x86ms = X86_MACHINE(pcms); if (pcms->acpi_dev) { hotplug_handler_plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); @@ -2163,12 +1651,12 @@ static void pc_cpu_plug(HotplugHandler *hotplug_dev, } /* increment the number of CPUs */ - pcms->boot_cpus++; - if (pcms->rtc) { - rtc_set_cpus_count(pcms->rtc, pcms->boot_cpus); + x86ms->boot_cpus++; + if (x86ms->rtc) { + rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); } - if (pcms->fw_cfg) { - fw_cfg_modify_i16(pcms->fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus); + if (x86ms->fw_cfg) { + fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); } found_cpu = pc_find_cpu_slot(MACHINE(pcms), cpu->apic_id, NULL); @@ -2214,6 +1702,7 @@ static void pc_cpu_unplug_cb(HotplugHandler *hotplug_dev, Error *local_err = NULL; X86CPU *cpu = X86_CPU(dev); PCMachineState *pcms = PC_MACHINE(hotplug_dev); + X86MachineState *x86ms = X86_MACHINE(pcms); hotplug_handler_unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); if (local_err) { @@ -2225,10 +1714,10 @@ static void pc_cpu_unplug_cb(HotplugHandler *hotplug_dev, object_property_set_bool(OBJECT(dev), false, "realized", NULL); /* decrement the number of CPUs */ - pcms->boot_cpus--; + x86ms->boot_cpus--; /* Update the number of CPUs in CMOS */ - rtc_set_cpus_count(pcms->rtc, pcms->boot_cpus); - fw_cfg_modify_i16(pcms->fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus); + rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); + fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); out: error_propagate(errp, local_err); } @@ -2244,6 +1733,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, CPUX86State *env = &cpu->env; MachineState *ms = MACHINE(hotplug_dev); PCMachineState *pcms = PC_MACHINE(hotplug_dev); + X86MachineState *x86ms = X86_MACHINE(hotplug_dev); unsigned int smp_cores = ms->smp.cores; unsigned int smp_threads = ms->smp.threads; @@ -2253,7 +1743,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, return; } - env->nr_dies = pcms->smp_dies; + env->nr_dies = x86ms->smp_dies; /* * If APIC ID is not set, @@ -2261,13 +1751,13 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, */ if (cpu->apic_id == UNASSIGNED_APIC_ID) { int max_socket = (ms->smp.max_cpus - 1) / - smp_threads / smp_cores / pcms->smp_dies; + smp_threads / smp_cores / x86ms->smp_dies; /* * die-id was optional in QEMU 4.0 and older, so keep it optional * if there's only one die per socket. */ - if (cpu->die_id < 0 && pcms->smp_dies == 1) { + if (cpu->die_id < 0 && x86ms->smp_dies == 1) { cpu->die_id = 0; } @@ -2282,9 +1772,9 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, if (cpu->die_id < 0) { error_setg(errp, "CPU die-id is not set"); return; - } else if (cpu->die_id > pcms->smp_dies - 1) { + } else if (cpu->die_id > x86ms->smp_dies - 1) { error_setg(errp, "Invalid CPU die-id: %u must be in range 0:%u", - cpu->die_id, pcms->smp_dies - 1); + cpu->die_id, x86ms->smp_dies - 1); return; } if (cpu->core_id < 0) { @@ -2308,7 +1798,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, topo.die_id = cpu->die_id; topo.core_id = cpu->core_id; topo.smt_id = cpu->thread_id; - cpu->apic_id = apicid_from_topo_ids(pcms->smp_dies, smp_cores, + cpu->apic_id = apicid_from_topo_ids(x86ms->smp_dies, smp_cores, smp_threads, &topo); } @@ -2316,7 +1806,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, if (!cpu_slot) { MachineState *ms = MACHINE(pcms); - x86_topo_ids_from_apicid(cpu->apic_id, pcms->smp_dies, + x86_topo_ids_from_apicid(cpu->apic_id, x86ms->smp_dies, smp_cores, smp_threads, &topo); error_setg(errp, "Invalid CPU [socket: %u, die: %u, core: %u, thread: %u] with" @@ -2338,7 +1828,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, /* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn() * once -smp refactoring is complete and there will be CPU private * CPUState::nr_cores and CPUState::nr_threads fields instead of globals */ - x86_topo_ids_from_apicid(cpu->apic_id, pcms->smp_dies, + x86_topo_ids_from_apicid(cpu->apic_id, x86ms->smp_dies, smp_cores, smp_threads, &topo); if (cpu->socket_id != -1 && cpu->socket_id != topo.pkg_id) { error_setg(errp, "property socket-id: %u doesn't match set apic-id:" @@ -2520,45 +2010,6 @@ pc_machine_get_device_memory_region_size(Object *obj, Visitor *v, visit_type_int(v, name, &value, errp); } -static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v, - const char *name, void *opaque, - Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(obj); - uint64_t value = pcms->max_ram_below_4g; - - visit_type_size(v, name, &value, errp); -} - -static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v, - const char *name, void *opaque, - Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(obj); - Error *error = NULL; - uint64_t value; - - visit_type_size(v, name, &value, &error); - if (error) { - error_propagate(errp, error); - return; - } - if (value > 4 * GiB) { - error_setg(&error, - "Machine option 'max-ram-below-4g=%"PRIu64 - "' expects size less than or equal to 4G", value); - error_propagate(errp, error); - return; - } - - if (value < 1 * MiB) { - warn_report("Only %" PRIu64 " bytes of RAM below the 4GiB boundary," - "BIOS may not work with less than 1MiB", value); - } - - pcms->max_ram_below_4g = value; -} - static void pc_machine_get_vmport(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { @@ -2664,7 +2115,6 @@ static void pc_machine_initfn(Object *obj) { PCMachineState *pcms = PC_MACHINE(obj); - pcms->max_ram_below_4g = 0; /* use default */ pcms->smm = ON_OFF_AUTO_AUTO; #ifdef CONFIG_VMPORT pcms->vmport = ON_OFF_AUTO_AUTO; @@ -2676,7 +2126,6 @@ static void pc_machine_initfn(Object *obj) pcms->smbus_enabled = true; pcms->sata_enabled = true; pcms->pit_enabled = true; - pcms->smp_dies = 1; pc_system_flash_create(pcms); } @@ -2707,85 +2156,6 @@ static void pc_machine_wakeup(MachineState *machine) cpu_synchronize_all_post_reset(); } -static CpuInstanceProperties -pc_cpu_index_to_props(MachineState *ms, unsigned cpu_index) -{ - MachineClass *mc = MACHINE_GET_CLASS(ms); - const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); - - assert(cpu_index < possible_cpus->len); - return possible_cpus->cpus[cpu_index].props; -} - -static int64_t pc_get_default_cpu_node_id(const MachineState *ms, int idx) -{ - X86CPUTopoInfo topo; - PCMachineState *pcms = PC_MACHINE(ms); - - assert(idx < ms->possible_cpus->len); - x86_topo_ids_from_apicid(ms->possible_cpus->cpus[idx].arch_id, - pcms->smp_dies, ms->smp.cores, - ms->smp.threads, &topo); - return topo.pkg_id % ms->numa_state->num_nodes; -} - -static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms) -{ - PCMachineState *pcms = PC_MACHINE(ms); - int i; - unsigned int max_cpus = ms->smp.max_cpus; - - if (ms->possible_cpus) { - /* - * make sure that max_cpus hasn't changed since the first use, i.e. - * -smp hasn't been parsed after it - */ - assert(ms->possible_cpus->len == max_cpus); - return ms->possible_cpus; - } - - ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) + - sizeof(CPUArchId) * max_cpus); - ms->possible_cpus->len = max_cpus; - for (i = 0; i < ms->possible_cpus->len; i++) { - X86CPUTopoInfo topo; - - ms->possible_cpus->cpus[i].type = ms->cpu_type; - ms->possible_cpus->cpus[i].vcpus_count = 1; - ms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(pcms, i); - x86_topo_ids_from_apicid(ms->possible_cpus->cpus[i].arch_id, - pcms->smp_dies, ms->smp.cores, - ms->smp.threads, &topo); - ms->possible_cpus->cpus[i].props.has_socket_id = true; - ms->possible_cpus->cpus[i].props.socket_id = topo.pkg_id; - if (pcms->smp_dies > 1) { - ms->possible_cpus->cpus[i].props.has_die_id = true; - ms->possible_cpus->cpus[i].props.die_id = topo.die_id; - } - ms->possible_cpus->cpus[i].props.has_core_id = true; - ms->possible_cpus->cpus[i].props.core_id = topo.core_id; - ms->possible_cpus->cpus[i].props.has_thread_id = true; - ms->possible_cpus->cpus[i].props.thread_id = topo.smt_id; - } - return ms->possible_cpus; -} - -static void x86_nmi(NMIState *n, int cpu_index, Error **errp) -{ - /* cpu index isn't used */ - CPUState *cs; - - CPU_FOREACH(cs) { - X86CPU *cpu = X86_CPU(cs); - - if (!cpu->apic_state) { - cpu_interrupt(cs, CPU_INTERRUPT_NMI); - } else { - apic_deliver_nmi(cpu->apic_state); - } - } -} - static void pc_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); @@ -2810,14 +2180,11 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->pvh_enabled = true; assert(!mc->get_hotplug_handler); mc->get_hotplug_handler = pc_get_hotplug_handler; - mc->cpu_index_to_instance_props = pc_cpu_index_to_props; - mc->get_default_cpu_node_id = pc_get_default_cpu_node_id; - mc->possible_cpu_arch_ids = pc_possible_cpu_arch_ids; mc->auto_enable_numa_with_memhp = true; mc->has_hotpluggable_cpus = true; mc->default_boot_order = "cad"; - mc->hot_add_cpu = pc_hot_add_cpu; - mc->smp_parse = pc_smp_parse; + mc->hot_add_cpu = x86_hot_add_cpu; + mc->smp_parse = x86_smp_parse; mc->block_default_type = IF_IDE; mc->max_cpus = 255; mc->reset = pc_machine_reset; @@ -2835,13 +2202,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pc_machine_get_device_memory_region_size, NULL, NULL, NULL, &error_abort); - object_class_property_add(oc, PC_MACHINE_MAX_RAM_BELOW_4G, "size", - pc_machine_get_max_ram_below_4g, pc_machine_set_max_ram_below_4g, - NULL, NULL, &error_abort); - - object_class_property_set_description(oc, PC_MACHINE_MAX_RAM_BELOW_4G, - "Maximum ram below the 4G boundary (32bit boundary)", &error_abort); - object_class_property_add(oc, PC_MACHINE_SMM, "OnOffAuto", pc_machine_get_smm, pc_machine_set_smm, NULL, NULL, &error_abort); @@ -2866,7 +2226,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) static const TypeInfo pc_machine_info = { .name = TYPE_PC_MACHINE, - .parent = TYPE_MACHINE, + .parent = TYPE_X86_MACHINE, .abstract = true, .instance_size = sizeof(PCMachineState), .instance_init = pc_machine_initfn, diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 2362675149..f63c27bc74 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -27,6 +27,7 @@ #include "qemu/units.h" #include "hw/loader.h" +#include "hw/i386/x86.h" #include "hw/i386/pc.h" #include "hw/i386/apic.h" #include "hw/display/ramfb.h" @@ -73,6 +74,7 @@ static void pc_init1(MachineState *machine, { PCMachineState *pcms = PC_MACHINE(machine); PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); + X86MachineState *x86ms = X86_MACHINE(pcms); MemoryRegion *system_memory = get_system_memory(); MemoryRegion *system_io = get_system_io(); int i; @@ -125,11 +127,11 @@ static void pc_init1(MachineState *machine, if (xen_enabled()) { xen_hvm_init(pcms, &ram_memory); } else { - if (!pcms->max_ram_below_4g) { - pcms->max_ram_below_4g = 0xe0000000; /* default: 3.5G */ + if (!x86ms->max_ram_below_4g) { + x86ms->max_ram_below_4g = 0xe0000000; /* default: 3.5G */ } - lowmem = pcms->max_ram_below_4g; - if (machine->ram_size >= pcms->max_ram_below_4g) { + lowmem = x86ms->max_ram_below_4g; + if (machine->ram_size >= x86ms->max_ram_below_4g) { if (pcmc->gigabyte_align) { if (lowmem > 0xc0000000) { lowmem = 0xc0000000; @@ -138,21 +140,21 @@ static void pc_init1(MachineState *machine, warn_report("Large machine and max_ram_below_4g " "(%" PRIu64 ") not a multiple of 1G; " "possible bad performance.", - pcms->max_ram_below_4g); + x86ms->max_ram_below_4g); } } } if (machine->ram_size >= lowmem) { - pcms->above_4g_mem_size = machine->ram_size - lowmem; - pcms->below_4g_mem_size = lowmem; + x86ms->above_4g_mem_size = machine->ram_size - lowmem; + x86ms->below_4g_mem_size = lowmem; } else { - pcms->above_4g_mem_size = 0; - pcms->below_4g_mem_size = machine->ram_size; + x86ms->above_4g_mem_size = 0; + x86ms->below_4g_mem_size = machine->ram_size; } } - pc_cpus_init(pcms); + x86_cpus_init(x86ms, pcmc->default_cpu_version); if (kvm_enabled() && pcmc->kvmclock_enabled) { kvmclock_create(); @@ -190,19 +192,19 @@ static void pc_init1(MachineState *machine, gsi_state = g_malloc0(sizeof(*gsi_state)); if (kvm_ioapic_in_kernel()) { kvm_pc_setup_irq_routing(pcmc->pci_enabled); - pcms->gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state, - GSI_NUM_PINS); + x86ms->gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state, + GSI_NUM_PINS); } else { - pcms->gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS); + x86ms->gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS); } if (pcmc->pci_enabled) { pci_bus = i440fx_init(host_type, pci_type, - &i440fx_state, &piix3_devfn, &isa_bus, pcms->gsi, + &i440fx_state, &piix3_devfn, &isa_bus, x86ms->gsi, system_memory, system_io, machine->ram_size, - pcms->below_4g_mem_size, - pcms->above_4g_mem_size, + x86ms->below_4g_mem_size, + x86ms->above_4g_mem_size, pci_memory, ram_memory); pcms->bus = pci_bus; } else { @@ -212,7 +214,7 @@ static void pc_init1(MachineState *machine, &error_abort); no_hpet = 1; } - isa_bus_irqs(isa_bus, pcms->gsi); + isa_bus_irqs(isa_bus, x86ms->gsi); if (kvm_pic_in_kernel()) { i8259 = kvm_i8259_init(isa_bus); @@ -230,7 +232,7 @@ static void pc_init1(MachineState *machine, ioapic_init_gsi(gsi_state, "i440fx"); } - pc_register_ferr_irq(pcms->gsi[13]); + pc_register_ferr_irq(x86ms->gsi[13]); pc_vga_init(isa_bus, pcmc->pci_enabled ? pci_bus : NULL); @@ -240,7 +242,7 @@ static void pc_init1(MachineState *machine, } /* init basic PC hardware */ - pc_basic_device_init(isa_bus, pcms->gsi, &rtc_state, true, + pc_basic_device_init(isa_bus, x86ms->gsi, &rtc_state, true, (pcms->vmport != ON_OFF_AUTO_ON), pcms->pit_enabled, 0x4); @@ -288,7 +290,7 @@ else { smi_irq = qemu_allocate_irq(pc_acpi_smi_interrupt, first_cpu, 0); /* TODO: Populate SPD eeprom data. */ smbus = piix4_pm_init(pci_bus, piix3_devfn + 3, 0xb100, - pcms->gsi[9], smi_irq, + x86ms->gsi[9], smi_irq, pc_machine_is_smm_enabled(pcms), &piix4_pm); smbus_eeprom_init(smbus, 8, NULL, 0); @@ -304,7 +306,7 @@ else { if (machine->nvdimms_state->is_enabled) { nvdimm_init_acpi_state(machine->nvdimms_state, system_io, - pcms->fw_cfg, OBJECT(pcms)); + x86ms->fw_cfg, OBJECT(pcms)); } } @@ -728,7 +730,7 @@ DEFINE_I440FX_MACHINE(v1_4, "pc-i440fx-1.4", pc_compat_1_4_fn, static void pc_i440fx_1_3_machine_options(MachineClass *m) { - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + X86MachineClass *x86mc = X86_MACHINE_CLASS(m); static GlobalProperty compat[] = { PC_CPU_MODEL_IDS("1.3.0") { "usb-tablet", "usb_version", "1" }, @@ -739,7 +741,7 @@ static void pc_i440fx_1_3_machine_options(MachineClass *m) pc_i440fx_1_4_machine_options(m); m->hw_version = "1.3.0"; - pcmc->compat_apic_id_mode = true; + x86mc->compat_apic_id_mode = true; compat_props_add(m->compat_props, compat, G_N_ELEMENTS(compat)); } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index d4e8a1cb9f..71f71bc61d 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -41,6 +41,7 @@ #include "hw/pci-host/q35.h" #include "hw/qdev-properties.h" #include "exec/address-spaces.h" +#include "hw/i386/x86.h" #include "hw/i386/pc.h" #include "hw/i386/ich9.h" #include "hw/i386/amd_iommu.h" @@ -115,6 +116,7 @@ static void pc_q35_init(MachineState *machine) { PCMachineState *pcms = PC_MACHINE(machine); PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); + X86MachineState *x86ms = X86_MACHINE(pcms); Q35PCIHost *q35_host; PCIHostState *phb; PCIBus *host_bus; @@ -152,34 +154,34 @@ static void pc_q35_init(MachineState *machine) /* Handle the machine opt max-ram-below-4g. It is basically doing * min(qemu limit, user limit). */ - if (!pcms->max_ram_below_4g) { - pcms->max_ram_below_4g = 1ULL << 32; /* default: 4G */; + if (!x86ms->max_ram_below_4g) { + x86ms->max_ram_below_4g = 1ULL << 32; /* default: 4G */; } - if (lowmem > pcms->max_ram_below_4g) { - lowmem = pcms->max_ram_below_4g; + if (lowmem > x86ms->max_ram_below_4g) { + lowmem = x86ms->max_ram_below_4g; if (machine->ram_size - lowmem > lowmem && lowmem & (1 * GiB - 1)) { warn_report("There is possibly poor performance as the ram size " " (0x%" PRIx64 ") is more then twice the size of" " max-ram-below-4g (%"PRIu64") and" " max-ram-below-4g is not a multiple of 1G.", - (uint64_t)machine->ram_size, pcms->max_ram_below_4g); + (uint64_t)machine->ram_size, x86ms->max_ram_below_4g); } } if (machine->ram_size >= lowmem) { - pcms->above_4g_mem_size = machine->ram_size - lowmem; - pcms->below_4g_mem_size = lowmem; + x86ms->above_4g_mem_size = machine->ram_size - lowmem; + x86ms->below_4g_mem_size = lowmem; } else { - pcms->above_4g_mem_size = 0; - pcms->below_4g_mem_size = machine->ram_size; + x86ms->above_4g_mem_size = 0; + x86ms->below_4g_mem_size = machine->ram_size; } if (xen_enabled()) { xen_hvm_init(pcms, &ram_memory); } - pc_cpus_init(pcms); + x86_cpus_init(x86ms, pcmc->default_cpu_version); kvmclock_create(); @@ -213,10 +215,10 @@ static void pc_q35_init(MachineState *machine) gsi_state = g_malloc0(sizeof(*gsi_state)); if (kvm_ioapic_in_kernel()) { kvm_pc_setup_irq_routing(pcmc->pci_enabled); - pcms->gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state, + x86ms->gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state, GSI_NUM_PINS); } else { - pcms->gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS); + x86ms->gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS); } /* create pci host bus */ @@ -231,9 +233,9 @@ static void pc_q35_init(MachineState *machine) MCH_HOST_PROP_SYSTEM_MEM, NULL); object_property_set_link(OBJECT(q35_host), OBJECT(system_io), MCH_HOST_PROP_IO_MEM, NULL); - object_property_set_int(OBJECT(q35_host), pcms->below_4g_mem_size, + object_property_set_int(OBJECT(q35_host), x86ms->below_4g_mem_size, PCI_HOST_BELOW_4G_MEM_SIZE, NULL); - object_property_set_int(OBJECT(q35_host), pcms->above_4g_mem_size, + object_property_set_int(OBJECT(q35_host), x86ms->above_4g_mem_size, PCI_HOST_ABOVE_4G_MEM_SIZE, NULL); /* pci */ qdev_init_nofail(DEVICE(q35_host)); @@ -255,7 +257,7 @@ static void pc_q35_init(MachineState *machine) ich9_lpc = ICH9_LPC_DEVICE(lpc); lpc_dev = DEVICE(lpc); for (i = 0; i < GSI_NUM_PINS; i++) { - qdev_connect_gpio_out_named(lpc_dev, ICH9_GPIO_GSI, i, pcms->gsi[i]); + qdev_connect_gpio_out_named(lpc_dev, ICH9_GPIO_GSI, i, x86ms->gsi[i]); } pci_bus_irqs(host_bus, ich9_lpc_set_irq, ich9_lpc_map_irq, ich9_lpc, ICH9_LPC_NB_PIRQS); @@ -279,7 +281,7 @@ static void pc_q35_init(MachineState *machine) ioapic_init_gsi(gsi_state, "q35"); } - pc_register_ferr_irq(pcms->gsi[13]); + pc_register_ferr_irq(x86ms->gsi[13]); assert(pcms->vmport != ON_OFF_AUTO__MAX); if (pcms->vmport == ON_OFF_AUTO_AUTO) { @@ -287,7 +289,7 @@ static void pc_q35_init(MachineState *machine) } /* init basic PC hardware */ - pc_basic_device_init(isa_bus, pcms->gsi, &rtc_state, !mc->no_floppy, + pc_basic_device_init(isa_bus, x86ms->gsi, &rtc_state, !mc->no_floppy, (pcms->vmport != ON_OFF_AUTO_ON), pcms->pit_enabled, 0xff0104); @@ -330,7 +332,7 @@ static void pc_q35_init(MachineState *machine) if (machine->nvdimms_state->is_enabled) { nvdimm_init_acpi_state(machine->nvdimms_state, system_io, - pcms->fw_cfg, OBJECT(pcms)); + x86ms->fw_cfg, OBJECT(pcms)); } } diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c index a9983f0bfb..97f38e0423 100644 --- a/hw/i386/pc_sysfw.c +++ b/hw/i386/pc_sysfw.c @@ -31,6 +31,7 @@ #include "qemu/option.h" #include "qemu/units.h" #include "hw/sysbus.h" +#include "hw/i386/x86.h" #include "hw/i386/pc.h" #include "hw/loader.h" #include "hw/qdev-properties.h" @@ -38,8 +39,6 @@ #include "hw/block/flash.h" #include "sysemu/kvm.h" -#define BIOS_FILENAME "bios.bin" - /* * We don't have a theoretically justifiable exact lower bound on the base * address of any flash mapping. In practice, the IO-APIC MMIO range is @@ -211,59 +210,6 @@ static void pc_system_flash_map(PCMachineState *pcms, } } -static void old_pc_system_rom_init(MemoryRegion *rom_memory, bool isapc_ram_fw) -{ - char *filename; - MemoryRegion *bios, *isa_bios; - int bios_size, isa_bios_size; - int ret; - - /* BIOS load */ - if (bios_name == NULL) { - bios_name = BIOS_FILENAME; - } - filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); - if (filename) { - bios_size = get_image_size(filename); - } else { - bios_size = -1; - } - if (bios_size <= 0 || - (bios_size % 65536) != 0) { - goto bios_error; - } - bios = g_malloc(sizeof(*bios)); - memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal); - if (!isapc_ram_fw) { - memory_region_set_readonly(bios, true); - } - ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); - if (ret != 0) { - bios_error: - fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name); - exit(1); - } - g_free(filename); - - /* map the last 128KB of the BIOS in ISA space */ - isa_bios_size = MIN(bios_size, 128 * KiB); - isa_bios = g_malloc(sizeof(*isa_bios)); - memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, - bios_size - isa_bios_size, isa_bios_size); - memory_region_add_subregion_overlap(rom_memory, - 0x100000 - isa_bios_size, - isa_bios, - 1); - if (!isapc_ram_fw) { - memory_region_set_readonly(isa_bios, true); - } - - /* map all the bios at the top of memory */ - memory_region_add_subregion(rom_memory, - (uint32_t)(-bios_size), - bios); -} - void pc_system_firmware_init(PCMachineState *pcms, MemoryRegion *rom_memory) { @@ -272,7 +218,7 @@ void pc_system_firmware_init(PCMachineState *pcms, BlockBackend *pflash_blk[ARRAY_SIZE(pcms->flash)]; if (!pcmc->pci_enabled) { - old_pc_system_rom_init(rom_memory, true); + x86_system_rom_init(rom_memory, true); return; } @@ -293,7 +239,7 @@ void pc_system_firmware_init(PCMachineState *pcms, if (!pflash_blk[0]) { /* Machine property pflash0 not set, use ROM mode */ - old_pc_system_rom_init(rom_memory, false); + x86_system_rom_init(rom_memory, false); } else { if (kvm_enabled() && !kvm_readonly_mem_enabled()) { /* diff --git a/hw/i386/x86.c b/hw/i386/x86.c new file mode 100644 index 0000000000..4de9dd100f --- /dev/null +++ b/hw/i386/x86.c @@ -0,0 +1,788 @@ +/* + * Copyright (c) 2003-2004 Fabrice Bellard + * Copyright (c) 2019 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qemu/option.h" +#include "qemu/cutils.h" +#include "qemu/units.h" +#include "qapi/error.h" +#include "qapi/qmp/qerror.h" +#include "qapi/qapi-visit-common.h" +#include "qapi/visitor.h" +#include "sysemu/qtest.h" +#include "sysemu/numa.h" +#include "sysemu/replay.h" +#include "sysemu/sysemu.h" + +#include "hw/i386/x86.h" +#include "target/i386/cpu.h" +#include "hw/i386/topology.h" +#include "hw/i386/fw_cfg.h" +#include "hw/acpi/cpu_hotplug.h" +#include "hw/nmi.h" +#include "hw/loader.h" +#include "multiboot.h" +#include "pvh.h" +#include "standard-headers/asm-x86/bootparam.h" + +#define BIOS_FILENAME "bios.bin" + +/* Calculates initial APIC ID for a specific CPU index + * + * Currently we need to be able to calculate the APIC ID from the CPU index + * alone (without requiring a CPU object), as the QEMU<->Seabios interfaces have + * no concept of "CPU index", and the NUMA tables on fw_cfg need the APIC ID of + * all CPUs up to max_cpus. + */ +uint32_t x86_cpu_apic_id_from_index(X86MachineState *x86ms, + unsigned int cpu_index) +{ + MachineState *ms = MACHINE(x86ms); + X86MachineClass *x86mc = X86_MACHINE_GET_CLASS(x86ms); + uint32_t correct_id; + static bool warned; + + correct_id = x86_apicid_from_cpu_idx(x86ms->smp_dies, ms->smp.cores, + ms->smp.threads, cpu_index); + if (x86mc->compat_apic_id_mode) { + if (cpu_index != correct_id && !warned && !qtest_enabled()) { + error_report("APIC IDs set in compatibility mode, " + "CPU topology won't match the configuration"); + warned = true; + } + return cpu_index; + } else { + return correct_id; + } +} + + +static void x86_new_cpu(X86MachineState *x86ms, int64_t apic_id, Error **errp) +{ + Object *cpu = NULL; + Error *local_err = NULL; + CPUX86State *env = NULL; + + cpu = object_new(MACHINE(x86ms)->cpu_type); + + env = &X86_CPU(cpu)->env; + env->nr_dies = x86ms->smp_dies; + + object_property_set_uint(cpu, apic_id, "apic-id", &local_err); + object_property_set_bool(cpu, true, "realized", &local_err); + + object_unref(cpu); + error_propagate(errp, local_err); +} + +/* + * This function is very similar to smp_parse() + * in hw/core/machine.c but includes CPU die support. + */ +void x86_smp_parse(MachineState *ms, QemuOpts *opts) +{ + X86MachineState *x86ms = X86_MACHINE(ms); + + if (opts) { + unsigned cpus = qemu_opt_get_number(opts, "cpus", 0); + unsigned sockets = qemu_opt_get_number(opts, "sockets", 0); + unsigned dies = qemu_opt_get_number(opts, "dies", 1); + unsigned cores = qemu_opt_get_number(opts, "cores", 0); + unsigned threads = qemu_opt_get_number(opts, "threads", 0); + + /* compute missing values, prefer sockets over cores over threads */ + if (cpus == 0 || sockets == 0) { + cores = cores > 0 ? cores : 1; + threads = threads > 0 ? threads : 1; + if (cpus == 0) { + sockets = sockets > 0 ? sockets : 1; + cpus = cores * threads * dies * sockets; + } else { + ms->smp.max_cpus = + qemu_opt_get_number(opts, "maxcpus", cpus); + sockets = ms->smp.max_cpus / (cores * threads * dies); + } + } else if (cores == 0) { + threads = threads > 0 ? threads : 1; + cores = cpus / (sockets * dies * threads); + cores = cores > 0 ? cores : 1; + } else if (threads == 0) { + threads = cpus / (cores * dies * sockets); + threads = threads > 0 ? threads : 1; + } else if (sockets * dies * cores * threads < cpus) { + error_report("cpu topology: " + "sockets (%u) * dies (%u) * cores (%u) * threads (%u) < " + "smp_cpus (%u)", + sockets, dies, cores, threads, cpus); + exit(1); + } + + ms->smp.max_cpus = + qemu_opt_get_number(opts, "maxcpus", cpus); + + if (ms->smp.max_cpus < cpus) { + error_report("maxcpus must be equal to or greater than smp"); + exit(1); + } + + if (sockets * dies * cores * threads > ms->smp.max_cpus) { + error_report("cpu topology: " + "sockets (%u) * dies (%u) * cores (%u) * threads (%u) > " + "maxcpus (%u)", + sockets, dies, cores, threads, + ms->smp.max_cpus); + exit(1); + } + + if (sockets * dies * cores * threads != ms->smp.max_cpus) { + warn_report("Invalid CPU topology deprecated: " + "sockets (%u) * dies (%u) * cores (%u) * threads (%u) " + "!= maxcpus (%u)", + sockets, dies, cores, threads, + ms->smp.max_cpus); + } + + ms->smp.cpus = cpus; + ms->smp.cores = cores; + ms->smp.threads = threads; + x86ms->smp_dies = dies; + } + + if (ms->smp.cpus > 1) { + Error *blocker = NULL; + error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED, "smp"); + replay_add_blocker(blocker); + } +} + +void x86_hot_add_cpu(MachineState *ms, const int64_t id, Error **errp) +{ + X86MachineState *x86ms = X86_MACHINE(ms); + int64_t apic_id = x86_cpu_apic_id_from_index(x86ms, id); + Error *local_err = NULL; + + if (id < 0) { + error_setg(errp, "Invalid CPU id: %" PRIi64, id); + return; + } + + if (apic_id >= ACPI_CPU_HOTPLUG_ID_LIMIT) { + error_setg(errp, "Unable to add CPU: %" PRIi64 + ", resulting APIC ID (%" PRIi64 ") is too large", + id, apic_id); + return; + } + + x86_new_cpu(X86_MACHINE(ms), apic_id, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } +} + +void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) +{ + int i; + const CPUArchIdList *possible_cpus; + MachineState *ms = MACHINE(x86ms); + MachineClass *mc = MACHINE_GET_CLASS(x86ms); + + x86_cpu_set_default_version(default_cpu_version); + + /* Calculates the limit to CPU APIC ID values + * + * Limit for the APIC ID value, so that all + * CPU APIC IDs are < x86ms->apic_id_limit. + * + * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init(). + */ + x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms, + ms->smp.max_cpus - 1) + 1; + possible_cpus = mc->possible_cpu_arch_ids(ms); + for (i = 0; i < ms->smp.cpus; i++) { + x86_new_cpu(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); + } +} + +void x86_nmi(NMIState *n, int cpu_index, Error **errp) +{ + /* cpu index isn't used */ + CPUState *cs; + + CPU_FOREACH(cs) { + X86CPU *cpu = X86_CPU(cs); + + if (!cpu->apic_state) { + cpu_interrupt(cs, CPU_INTERRUPT_NMI); + } else { + apic_deliver_nmi(cpu->apic_state); + } + } +} + +CpuInstanceProperties +x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index) +{ + MachineClass *mc = MACHINE_GET_CLASS(ms); + const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); + + assert(cpu_index < possible_cpus->len); + return possible_cpus->cpus[cpu_index].props; +} + +int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx) +{ + X86CPUTopoInfo topo; + X86MachineState *x86ms = X86_MACHINE(ms); + + assert(idx < ms->possible_cpus->len); + x86_topo_ids_from_apicid(ms->possible_cpus->cpus[idx].arch_id, + x86ms->smp_dies, ms->smp.cores, + ms->smp.threads, &topo); + return topo.pkg_id % ms->numa_state->num_nodes; +} + +const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms) +{ + X86MachineState *x86ms = X86_MACHINE(ms); + int i; + unsigned int max_cpus = ms->smp.max_cpus; + + if (ms->possible_cpus) { + /* + * make sure that max_cpus hasn't changed since the first use, i.e. + * -smp hasn't been parsed after it + */ + assert(ms->possible_cpus->len == max_cpus); + return ms->possible_cpus; + } + + ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) + + sizeof(CPUArchId) * max_cpus); + ms->possible_cpus->len = max_cpus; + for (i = 0; i < ms->possible_cpus->len; i++) { + X86CPUTopoInfo topo; + + ms->possible_cpus->cpus[i].type = ms->cpu_type; + ms->possible_cpus->cpus[i].vcpus_count = 1; + ms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(x86ms, i); + x86_topo_ids_from_apicid(ms->possible_cpus->cpus[i].arch_id, + x86ms->smp_dies, ms->smp.cores, + ms->smp.threads, &topo); + ms->possible_cpus->cpus[i].props.has_socket_id = true; + ms->possible_cpus->cpus[i].props.socket_id = topo.pkg_id; + if (x86ms->smp_dies > 1) { + ms->possible_cpus->cpus[i].props.has_die_id = true; + ms->possible_cpus->cpus[i].props.die_id = topo.die_id; + } + ms->possible_cpus->cpus[i].props.has_core_id = true; + ms->possible_cpus->cpus[i].props.core_id = topo.core_id; + ms->possible_cpus->cpus[i].props.has_thread_id = true; + ms->possible_cpus->cpus[i].props.thread_id = topo.smt_id; + } + return ms->possible_cpus; +} + +void x86_system_rom_init(MemoryRegion *rom_memory, bool isapc_ram_fw) +{ + char *filename; + MemoryRegion *bios, *isa_bios; + int bios_size, isa_bios_size; + int ret; + + /* BIOS load */ + if (bios_name == NULL) { + bios_name = BIOS_FILENAME; + } + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); + if (filename) { + bios_size = get_image_size(filename); + } else { + bios_size = -1; + } + if (bios_size <= 0 || + (bios_size % 65536) != 0) { + goto bios_error; + } + bios = g_malloc(sizeof(*bios)); + memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal); + if (!isapc_ram_fw) { + memory_region_set_readonly(bios, true); + } + ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); + if (ret != 0) { + bios_error: + fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name); + exit(1); + } + g_free(filename); + + /* map the last 128KB of the BIOS in ISA space */ + isa_bios_size = MIN(bios_size, 128 * KiB); + isa_bios = g_malloc(sizeof(*isa_bios)); + memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, + bios_size - isa_bios_size, isa_bios_size); + memory_region_add_subregion_overlap(rom_memory, + 0x100000 - isa_bios_size, + isa_bios, + 1); + if (!isapc_ram_fw) { + memory_region_set_readonly(isa_bios, true); + } + + /* map all the bios at the top of memory */ + memory_region_add_subregion(rom_memory, + (uint32_t)(-bios_size), + bios); +} + +static long get_file_size(FILE *f) +{ + long where, size; + + /* XXX: on Unix systems, using fstat() probably makes more sense */ + + where = ftell(f); + fseek(f, 0, SEEK_END); + size = ftell(f); + fseek(f, where, SEEK_SET); + + return size; +} + +struct setup_data { + uint64_t next; + uint32_t type; + uint32_t len; + uint8_t data[0]; +} __attribute__((packed)); + +void load_linux(X86MachineState *x86ms, + FWCfgState *fw_cfg, + unsigned acpi_data_size, + bool linuxboot_dma_enabled, + bool pvh_enabled) +{ + uint16_t protocol; + int setup_size, kernel_size, cmdline_size; + int dtb_size, setup_data_offset; + uint32_t initrd_max; + uint8_t header[8192], *setup, *kernel; + hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; + FILE *f; + char *vmode; + MachineState *machine = MACHINE(x86ms); + struct setup_data *setup_data; + const char *kernel_filename = machine->kernel_filename; + const char *initrd_filename = machine->initrd_filename; + const char *dtb_filename = machine->dtb; + const char *kernel_cmdline = machine->kernel_cmdline; + + /* Align to 16 bytes as a paranoia measure */ + cmdline_size = (strlen(kernel_cmdline)+16) & ~15; + + /* load the kernel header */ + f = fopen(kernel_filename, "rb"); + if (!f || !(kernel_size = get_file_size(f)) || + fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != + MIN(ARRAY_SIZE(header), kernel_size)) { + fprintf(stderr, "qemu: could not load kernel '%s': %s\n", + kernel_filename, strerror(errno)); + exit(1); + } + + /* kernel protocol version */ +#if 0 + fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202)); +#endif + if (ldl_p(header+0x202) == 0x53726448) { + protocol = lduw_p(header+0x206); + } else { + size_t pvh_start_addr; + uint32_t mh_load_addr = 0; + uint32_t elf_kernel_size = 0; + /* + * This could be a multiboot kernel. If it is, let's stop treating it + * like a Linux kernel. + * Note: some multiboot images could be in the ELF format (the same of + * PVH), so we try multiboot first since we check the multiboot magic + * header before to load it. + */ + if (load_multiboot(fw_cfg, f, kernel_filename, initrd_filename, + kernel_cmdline, kernel_size, header)) { + return; + } + /* + * Check if the file is an uncompressed kernel file (ELF) and load it, + * saving the PVH entry point used by the x86/HVM direct boot ABI. + * If load_elfboot() is successful, populate the fw_cfg info. + */ + if (pvh_enabled && + pvh_load_elfboot(kernel_filename, + &mh_load_addr, &elf_kernel_size)) { + fclose(f); + + pvh_start_addr = pvh_get_start_addr(); + + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size); + + fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, + strlen(kernel_cmdline) + 1); + fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); + + fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header)); + fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, + header, sizeof(header)); + + /* load initrd */ + if (initrd_filename) { + GMappedFile *mapped_file; + gsize initrd_size; + gchar *initrd_data; + GError *gerr = NULL; + + mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); + if (!mapped_file) { + fprintf(stderr, "qemu: error reading initrd %s: %s\n", + initrd_filename, gerr->message); + exit(1); + } + x86ms->initrd_mapped_file = mapped_file; + + initrd_data = g_mapped_file_get_contents(mapped_file); + initrd_size = g_mapped_file_get_length(mapped_file); + initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; + if (initrd_size >= initrd_max) { + fprintf(stderr, "qemu: initrd is too large, cannot support." + "(max: %"PRIu32", need %"PRId64")\n", + initrd_max, (uint64_t)initrd_size); + exit(1); + } + + initrd_addr = (initrd_max - initrd_size) & ~4095; + + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); + fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, + initrd_size); + } + + option_rom[nb_option_roms].bootindex = 0; + option_rom[nb_option_roms].name = "pvh.bin"; + nb_option_roms++; + + return; + } + protocol = 0; + } + + if (protocol < 0x200 || !(header[0x211] & 0x01)) { + /* Low kernel */ + real_addr = 0x90000; + cmdline_addr = 0x9a000 - cmdline_size; + prot_addr = 0x10000; + } else if (protocol < 0x202) { + /* High but ancient kernel */ + real_addr = 0x90000; + cmdline_addr = 0x9a000 - cmdline_size; + prot_addr = 0x100000; + } else { + /* High and recent kernel */ + real_addr = 0x10000; + cmdline_addr = 0x20000; + prot_addr = 0x100000; + } + +#if 0 + fprintf(stderr, + "qemu: real_addr = 0x" TARGET_FMT_plx "\n" + "qemu: cmdline_addr = 0x" TARGET_FMT_plx "\n" + "qemu: prot_addr = 0x" TARGET_FMT_plx "\n", + real_addr, + cmdline_addr, + prot_addr); +#endif + + /* highest address for loading the initrd */ + if (protocol >= 0x20c && + lduw_p(header+0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) { + /* + * Linux has supported initrd up to 4 GB for a very long time (2007, + * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013), + * though it only sets initrd_max to 2 GB to "work around bootloader + * bugs". Luckily, QEMU firmware(which does something like bootloader) + * has supported this. + * + * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can + * be loaded into any address. + * + * In addition, initrd_max is uint32_t simply because QEMU doesn't + * support the 64-bit boot protocol (specifically the ext_ramdisk_image + * field). + * + * Therefore here just limit initrd_max to UINT32_MAX simply as well. + */ + initrd_max = UINT32_MAX; + } else if (protocol >= 0x203) { + initrd_max = ldl_p(header+0x22c); + } else { + initrd_max = 0x37ffffff; + } + + if (initrd_max >= x86ms->below_4g_mem_size - acpi_data_size) { + initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; + } + + fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline)+1); + fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); + + if (protocol >= 0x202) { + stl_p(header+0x228, cmdline_addr); + } else { + stw_p(header+0x20, 0xA33F); + stw_p(header+0x22, cmdline_addr-real_addr); + } + + /* handle vga= parameter */ + vmode = strstr(kernel_cmdline, "vga="); + if (vmode) { + unsigned int video_mode; + /* skip "vga=" */ + vmode += 4; + if (!strncmp(vmode, "normal", 6)) { + video_mode = 0xffff; + } else if (!strncmp(vmode, "ext", 3)) { + video_mode = 0xfffe; + } else if (!strncmp(vmode, "ask", 3)) { + video_mode = 0xfffd; + } else { + video_mode = strtol(vmode, NULL, 0); + } + stw_p(header+0x1fa, video_mode); + } + + /* loader type */ + /* High nybble = B reserved for QEMU; low nybble is revision number. + If this code is substantially changed, you may want to consider + incrementing the revision. */ + if (protocol >= 0x200) { + header[0x210] = 0xB0; + } + /* heap */ + if (protocol >= 0x201) { + header[0x211] |= 0x80; /* CAN_USE_HEAP */ + stw_p(header+0x224, cmdline_addr-real_addr-0x200); + } + + /* load initrd */ + if (initrd_filename) { + GMappedFile *mapped_file; + gsize initrd_size; + gchar *initrd_data; + GError *gerr = NULL; + + if (protocol < 0x200) { + fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n"); + exit(1); + } + + mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); + if (!mapped_file) { + fprintf(stderr, "qemu: error reading initrd %s: %s\n", + initrd_filename, gerr->message); + exit(1); + } + x86ms->initrd_mapped_file = mapped_file; + + initrd_data = g_mapped_file_get_contents(mapped_file); + initrd_size = g_mapped_file_get_length(mapped_file); + if (initrd_size >= initrd_max) { + fprintf(stderr, "qemu: initrd is too large, cannot support." + "(max: %"PRIu32", need %"PRId64")\n", + initrd_max, (uint64_t)initrd_size); + exit(1); + } + + initrd_addr = (initrd_max-initrd_size) & ~4095; + + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); + fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size); + + stl_p(header+0x218, initrd_addr); + stl_p(header+0x21c, initrd_size); + } + + /* load kernel and setup */ + setup_size = header[0x1f1]; + if (setup_size == 0) { + setup_size = 4; + } + setup_size = (setup_size+1)*512; + if (setup_size > kernel_size) { + fprintf(stderr, "qemu: invalid kernel header\n"); + exit(1); + } + kernel_size -= setup_size; + + setup = g_malloc(setup_size); + kernel = g_malloc(kernel_size); + fseek(f, 0, SEEK_SET); + if (fread(setup, 1, setup_size, f) != setup_size) { + fprintf(stderr, "fread() failed\n"); + exit(1); + } + if (fread(kernel, 1, kernel_size, f) != kernel_size) { + fprintf(stderr, "fread() failed\n"); + exit(1); + } + fclose(f); + + /* append dtb to kernel */ + if (dtb_filename) { + if (protocol < 0x209) { + fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n"); + exit(1); + } + + dtb_size = get_image_size(dtb_filename); + if (dtb_size <= 0) { + fprintf(stderr, "qemu: error reading dtb %s: %s\n", + dtb_filename, strerror(errno)); + exit(1); + } + + setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); + kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; + kernel = g_realloc(kernel, kernel_size); + + stq_p(header+0x250, prot_addr + setup_data_offset); + + setup_data = (struct setup_data *)(kernel + setup_data_offset); + setup_data->next = 0; + setup_data->type = cpu_to_le32(SETUP_DTB); + setup_data->len = cpu_to_le32(dtb_size); + + load_image_size(dtb_filename, setup_data->data, dtb_size); + } + + memcpy(setup, header, MIN(sizeof(header), setup_size)); + + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); + fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); + + fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); + fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); + + option_rom[nb_option_roms].bootindex = 0; + option_rom[nb_option_roms].name = "linuxboot.bin"; + if (linuxboot_dma_enabled && fw_cfg_dma_enabled(fw_cfg)) { + option_rom[nb_option_roms].name = "linuxboot_dma.bin"; + } + nb_option_roms++; +} + +static void x86_machine_get_max_ram_below_4g(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + X86MachineState *x86ms = X86_MACHINE(obj); + uint64_t value = x86ms->max_ram_below_4g; + + visit_type_size(v, name, &value, errp); +} + +static void x86_machine_set_max_ram_below_4g(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + X86MachineState *x86ms = X86_MACHINE(obj); + Error *error = NULL; + uint64_t value; + + visit_type_size(v, name, &value, &error); + if (error) { + error_propagate(errp, error); + return; + } + if (value > 4 * GiB) { + error_setg(&error, + "Machine option 'max-ram-below-4g=%"PRIu64 + "' expects size less than or equal to 4G", value); + error_propagate(errp, error); + return; + } + + if (value < 1 * MiB) { + warn_report("Only %" PRIu64 " bytes of RAM below the 4GiB boundary," + "BIOS may not work with less than 1MiB", value); + } + + x86ms->max_ram_below_4g = value; +} + +static void x86_machine_initfn(Object *obj) +{ + X86MachineState *x86ms = X86_MACHINE(obj); + + x86ms->max_ram_below_4g = 0; /* use default */ + x86ms->smp_dies = 1; +} + +static void x86_machine_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + + mc->cpu_index_to_instance_props = x86_cpu_index_to_props; + mc->get_default_cpu_node_id = x86_get_default_cpu_node_id; + mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids; + + object_class_property_add(oc, X86_MACHINE_MAX_RAM_BELOW_4G, "size", + x86_machine_get_max_ram_below_4g, x86_machine_set_max_ram_below_4g, + NULL, NULL, &error_abort); + + object_class_property_set_description(oc, X86_MACHINE_MAX_RAM_BELOW_4G, + "Maximum ram below the 4G boundary (32bit boundary)", &error_abort); +} + +static const TypeInfo x86_machine_info = { + .name = TYPE_X86_MACHINE, + .parent = TYPE_MACHINE, + .abstract = true, + .instance_size = sizeof(X86MachineState), + .instance_init = x86_machine_initfn, + .class_size = sizeof(X86MachineClass), + .class_init = x86_machine_class_init, +}; + +static void x86_machine_register_types(void) +{ + type_register_static(&x86_machine_info); +} + +type_init(x86_machine_register_types) diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c index 1ede055387..e621dde6c3 100644 --- a/hw/intc/ioapic.c +++ b/hw/intc/ioapic.c @@ -23,6 +23,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "monitor/monitor.h" +#include "hw/i386/x86.h" #include "hw/i386/pc.h" #include "hw/i386/apic.h" #include "hw/i386/ioapic.h" @@ -89,7 +90,7 @@ static void ioapic_entry_parse(uint64_t entry, struct ioapic_entry_info *info) static void ioapic_service(IOAPICCommonState *s) { - AddressSpace *ioapic_as = PC_MACHINE(qdev_get_machine())->ioapic_as; + AddressSpace *ioapic_as = X86_MACHINE(qdev_get_machine())->ioapic_as; struct ioapic_entry_info info; uint8_t i; uint32_t mask; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 062feeb69e..de28d55e5c 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -3,6 +3,7 @@ #include "exec/memory.h" #include "hw/boards.h" +#include "hw/i386/x86.h" #include "hw/isa/isa.h" #include "hw/block/fdc.h" #include "hw/block/flash.h" @@ -27,7 +28,7 @@ */ struct PCMachineState { /*< private >*/ - MachineState parent_obj; + X86MachineState parent_obj; /* <public> */ @@ -36,15 +37,10 @@ struct PCMachineState { /* Pointers to devices and objects: */ HotplugHandler *acpi_dev; - ISADevice *rtc; PCIBus *bus; - FWCfgState *fw_cfg; - qemu_irq *gsi; PFlashCFI01 *flash[2]; - GMappedFile *initrd_mapped_file; /* Configuration options: */ - uint64_t max_ram_below_4g; OnOffAuto vmport; OnOffAuto smm; @@ -53,27 +49,13 @@ struct PCMachineState { bool sata_enabled; bool pit_enabled; - /* RAM information (sizes, addresses, configuration): */ - ram_addr_t below_4g_mem_size, above_4g_mem_size; - - /* CPU and apic information: */ - bool apic_xrupt_override; - unsigned apic_id_limit; - uint16_t boot_cpus; - unsigned smp_dies; - /* NUMA information: */ uint64_t numa_nodes; uint64_t *node_mem; - - /* Address space used by IOAPIC device. All IOAPIC interrupts - * will be translated to MSI messages in the address space. */ - AddressSpace *ioapic_as; }; #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device" #define PC_MACHINE_DEVMEM_REGION_SIZE "device-memory-region-size" -#define PC_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g" #define PC_MACHINE_VMPORT "vmport" #define PC_MACHINE_SMM "smm" #define PC_MACHINE_SMBUS "smbus" @@ -139,9 +121,6 @@ typedef struct PCMachineClass { /* use PVH to load kernels that support this feature */ bool pvh_enabled; - - /* Enables contiguous-apic-ID mode */ - bool compat_apic_id_mode; } PCMachineClass; #define TYPE_PC_MACHINE "generic-pc-machine" @@ -193,10 +172,6 @@ bool pc_machine_is_smm_enabled(PCMachineState *pcms); void pc_register_ferr_irq(qemu_irq irq); void pc_acpi_smi_interrupt(void *opaque, int irq, int level); -void pc_cpus_init(PCMachineState *pcms); -void pc_hot_add_cpu(MachineState *ms, const int64_t id, Error **errp); -void pc_smp_parse(MachineState *ms, QemuOpts *opts); - void pc_guest_info_init(PCMachineState *pcms); #define PCI_HOST_PROP_PCI_HOLE_START "pci-hole-start" diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h new file mode 100644 index 0000000000..5980090b29 --- /dev/null +++ b/include/hw/i386/x86.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2019 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef HW_I386_X86_H +#define HW_I386_X86_H + +#include "qemu-common.h" +#include "exec/hwaddr.h" +#include "qemu/notify.h" + +#include "hw/boards.h" +#include "hw/nmi.h" + +typedef struct { + /*< private >*/ + MachineClass parent; + + /*< public >*/ + + /* Enables contiguous-apic-ID mode */ + bool compat_apic_id_mode; +} X86MachineClass; + +typedef struct { + /*< private >*/ + MachineState parent; + + /*< public >*/ + + /* Pointers to devices and objects: */ + ISADevice *rtc; + FWCfgState *fw_cfg; + qemu_irq *gsi; + GMappedFile *initrd_mapped_file; + + /* Configuration options: */ + uint64_t max_ram_below_4g; + + /* RAM information (sizes, addresses, configuration): */ + ram_addr_t below_4g_mem_size, above_4g_mem_size; + + /* CPU and apic information: */ + bool apic_xrupt_override; + unsigned apic_id_limit; + uint16_t boot_cpus; + unsigned smp_dies; + + /* Address space used by IOAPIC device. All IOAPIC interrupts + * will be translated to MSI messages in the address space. */ + AddressSpace *ioapic_as; +} X86MachineState; + +#define X86_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g" + +#define TYPE_X86_MACHINE MACHINE_TYPE_NAME("x86") +#define X86_MACHINE(obj) \ + OBJECT_CHECK(X86MachineState, (obj), TYPE_X86_MACHINE) +#define X86_MACHINE_GET_CLASS(obj) \ + OBJECT_GET_CLASS(X86MachineClass, obj, TYPE_X86_MACHINE) +#define X86_MACHINE_CLASS(class) \ + OBJECT_CLASS_CHECK(X86MachineClass, class, TYPE_X86_MACHINE) + +uint32_t x86_cpu_apic_id_from_index(X86MachineState *x86ms, + unsigned int cpu_index); + +void x86_cpus_init(X86MachineState *pcms, int default_cpu_version); +void x86_hot_add_cpu(MachineState *ms, const int64_t id, Error **errp); +void x86_smp_parse(MachineState *ms, QemuOpts *opts); +void x86_nmi(NMIState *n, int cpu_index, Error **errp); + +CpuInstanceProperties x86_cpu_index_to_props(MachineState *ms, + unsigned cpu_index); +int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx); +const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms); + +void x86_system_rom_init(MemoryRegion *rom_memory, bool isapc_ram_fw); + +void load_linux(X86MachineState *x86ms, + FWCfgState *fw_cfg, + unsigned acpi_data_size, + bool linuxboot_dma_enabled, + bool pvh_enabled); + +#endif
Split up PCMachineState and PCMachineClass and derive X86MachineState and X86MachineClass from them. This allows sharing code with non-PC machine types. Also, move shared functions from pc.c to x86.c. Signed-off-by: Sergio Lopez <slp@redhat.com> --- hw/acpi/cpu_hotplug.c | 10 +- hw/i386/Makefile.objs | 1 + hw/i386/acpi-build.c | 31 +- hw/i386/amd_iommu.c | 4 +- hw/i386/intel_iommu.c | 4 +- hw/i386/pc.c | 796 +++++------------------------------------- hw/i386/pc_piix.c | 48 +-- hw/i386/pc_q35.c | 38 +- hw/i386/pc_sysfw.c | 60 +--- hw/i386/x86.c | 788 +++++++++++++++++++++++++++++++++++++++++ hw/intc/ioapic.c | 3 +- include/hw/i386/pc.h | 29 +- include/hw/i386/x86.h | 97 +++++ 13 files changed, 1045 insertions(+), 864 deletions(-) create mode 100644 hw/i386/x86.c create mode 100644 include/hw/i386/x86.h