diff mbox

x86/PCI: Intel Moorestown platform "PCI" support

Message ID 20090611142311.1c6b3c7b@jbarnes-g45 (mailing list archive)
State Superseded, archived
Headers show

Commit Message

Jesse Barnes June 11, 2009, 9:23 p.m. UTC
The Moorestown platform only has a few devices that actually support
PCI config cycles.  The rest of the devices use an in-RAM MCFG space
for the purposes of device enumeration and initialization.

There are a few uglies in the fake support, like BAR sizes that aren't
a power of two, sizing detection, and writes to the real devices, but
other than that it's pretty straightforward.

Another way to think of this is not really as PCI at all, but just a
table in RAM describing which devices are present, their capabilities
and their offsets in MMIO space.  This could have been done with a
special new firmware table on this platform, but given that we do have
some real PCI devices too, simply describing things in an MCFG type
space was pretty simple.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Jacob Pan <jacob.jun.pan@intel.com>

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Matthew Wilcox June 11, 2009, 9:52 p.m. UTC | #1
On Thu, Jun 11, 2009 at 02:23:11PM -0700, Jesse Barnes wrote:
> +++ b/arch/x86/pci/Makefile
> @@ -13,5 +13,5 @@ obj-$(CONFIG_X86_VISWS)		+= visws.o
>  
>  obj-$(CONFIG_X86_NUMAQ)		+= numaq_32.o
>  
> -obj-y				+= common.o early.o
> +obj-y				+= common.o early.o mrst.o

We don't want a CONFIG_PCI_MOORESTOWN, like we have for CONFIG_PCI_OLPC?

> +++ b/arch/x86/pci/init.c
> @@ -14,6 +14,10 @@ static __init int pci_arch_init(void)
>  
>  	if (!(pci_probe & PCI_PROBE_NOEARLY))
>  		pci_mmcfg_early_init();
> +#ifdef CONFIG_PCI_MMCONFIG
> +	pci_mmcfg_late_init();
> +	pci_mrst_init();
> +#endif

We should have empty stubs for these functions for the !MMCONFIG case.

> +++ b/arch/x86/pci/irq.c
>  	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
> +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SFI)
> +	/* For platforms only have IOAPIC, the PCI irq line is 1:1 mapped to
> +	 * IOAPIC RTE entries, so we just enable RTE for the device.
> +	 */

The comment is a little garbled.  Is the assumption here that all SFI
platforms have an IOAPIC?

> +	if (platform_has(X86_PLATFORM_FEATURE_IOAPIC) &&
> +		!platform_has(X86_PLATFORM_FEATURE_8259) &&
> +		!platform_has(X86_PLATFORM_FEATURE_ACPI) &&
> +		!platform_has(X86_PLATFORM_FEATURE_BIOS) &&
> +		pin) {
> +		ioapic = mp_sfi_find_ioapic(dev->irq);
> +		io_apic_set_pci_routing(ioapic,
> +			dev->irq, /* IOAPIC pin */
> +			dev->irq, /* IRQ line */
> +			1, /* level trigger */
> +			1); /* polarity active low */
> +	}
> +#endif
>  	if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
>  		char *msg = "";
>  
> diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
> index 8766b0e..4ff971a 100644
> --- a/arch/x86/pci/mmconfig-shared.c
> +++ b/arch/x86/pci/mmconfig-shared.c
> @@ -17,6 +17,7 @@
>  #include <linux/sort.h>
>  #include <asm/e820.h>
>  #include <asm/pci_x86.h>
> +#include <linux/sfi.h>
>  
>  /* aperture is up to 256MB but BIOS may reserve less */
>  #define MMCONFIG_APER_MIN	(2 * 1024*1024)
> @@ -186,7 +187,12 @@ static const char __init *pci_mmcfg_nvidia_mcp55(void)
>  	/*
>  	 * do check if amd fam10h already took over
>  	 */
> -	if (!acpi_disabled || pci_mmcfg_config_num || mcp55_checked)
> +#ifdef CONFIG_ACPI
> +
> +	if (!acpi_disabled)
> +		return NULL;
> +#endif
> +	if (pci_mmcfg_config_num || mcp55_checked)
>  		return NULL;

acpi_disabled definitely should be 0 if CONFIG_ACPI isn't set.

>  
> -	if (!known_bridge)
> +	if (!known_bridge) {
> +#ifdef CONFIG_SFI
> +		sfi_table_parse(SFI_SIG_MCFG, sfi_parse_mcfg);
> +#else
>  		acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg);
> +#endif
> +	}

SFI and ACPI are mutually exclusive?

> +#ifndef CONFIG_SFI
>  	pci_mmcfg_reject_broken(early);
> +#endif
>  
>  	if ((pci_mmcfg_config_num == 0) ||
>  	    (pci_mmcfg_config == NULL) ||
> diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
> index 8b2d561..ee9373a 100644
> --- a/arch/x86/pci/mmconfig_32.c
> +++ b/arch/x86/pci/mmconfig_32.c
> @@ -12,6 +12,7 @@
>  #include <linux/pci.h>
>  #include <linux/init.h>
>  #include <linux/acpi.h>
> +#include <linux/sfi.h>
>  #include <asm/e820.h>
>  #include <asm/pci_x86.h>
>  

That's the only change to this file?  Looks stray.

> diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c
> new file mode 100644
> index 0000000..1d69381
> --- /dev/null
> +++ b/arch/x86/pci/mrst.c
> @@ -0,0 +1,236 @@
> +/*
> + * Moorestown PCI support
> + *   Copyright (c) 2008 Intel Corporation
> + *     Jesse Barnes <jesse.barnes@intel.com>
> + *
> + * Moorestown has an interesting PCI implementation:
> + *   - configuration space is memory mapped (as defined by MCFG)
> + *   - Lincroft devices also have a real, type 1 configuration space
> + *   - Early Lincroft silicon has a type 1 access bug that will cause
> + *     a hang if non-existent devices are accessed
> + *   - some devices have the "fixed BAR" capability, which means
> + *     they can't be relocated or modified; check for that during
> + *     BAR sizing
> + *
> + * So, we use the MCFG space for all reads and writes, but also send
> + * Lincroft writes to type 1 space.  But only read/write if the device
> + * actually exists, otherwise return all 1s for reads and bit bucket
> + * the writes.
> + *
> + * Here we assume that type 1 and MCFG based access has already been set up,
> + * such that raw_pci_ops is type 1 and raw_pci_ext_ops is MCFG.

How about using pci_direct_conf1 explicitly?  We can make pci_mmcfg
non-static too.

> + */
> +
> +#include <linux/sched.h>
> +#include <linux/pci.h>
> +#include <linux/ioport.h>
> +#include <linux/init.h>
> +#include <linux/dmi.h>
> +
> +#include <asm/acpi.h>
> +#include <asm/segment.h>
> +#include <asm/io.h>
> +#include <asm/smp.h>
> +#include <asm/pci_x86.h>
> +/*
> + * Yuck, we really need to add PCIe capability functions to the core
> + */
> +#define PCIE_CAP_OFFSET	0x100

We've used PCI_CFG_SPACE_SIZE for this value before now ...

> +/* Fixed BAR fields */
> +#define PCIE_VNDR_CAP_ID_FIXED_BAR 0x00	/* Fixed BAR (TBD) */
> +#define PCI_FIXED_BAR_0_SIZE	0x04
> +#define PCI_FIXED_BAR_1_SIZE	0x08
> +#define PCI_FIXED_BAR_2_SIZE	0x0c
> +#define PCI_FIXED_BAR_3_SIZE	0x10
> +#define PCI_FIXED_BAR_4_SIZE	0x14
> +#define PCI_FIXED_BAR_5_SIZE	0x1c
> +
> +int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn)
> +{
> +	int pos;
> +	u32 pcie_cap = 0, cap_data;
> +
> +	pos = PCIE_CAP_OFFSET;
> +	while (pos) {
> +		if (raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number,
> +					  devfn, pos, 4, &pcie_cap))
> +			return 0;
> +
> +		if (pcie_cap == 0xffffffff)
> +			return 0;
> +
> +		if (PCI_EXT_CAP_ID(pcie_cap) == PCI_EXT_CAP_ID_VNDR) {
> +			raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number,
> +					      devfn, pos + 4, 4, &cap_data);
> +			if ((cap_data & 0xffff) == PCIE_VNDR_CAP_ID_FIXED_BAR)
> +				return pos;
> +		}
> +
> +		pos = pcie_cap >> 20;
> +	}
> +
> +	return 0;
> +}

Do we need to find this before we can call pci_find_ext_capability()?
Can we create pci_bus_find_ext_capability() analagous to
pci_bus_find_capability()?  I hate to see this kind of functionality
hidden in an arch file somewhere.

> +static int pci_device_update_fixed(struct pci_bus *bus, unsigned int devfn,
> +				   int reg, int len, u32 val, int offset)
> +{
> +	u32 size;
> +	unsigned int domain, busnum;
> +	int bar = (reg - PCI_BASE_ADDRESS_0) >> 2;
> +
> +	domain = pci_domain_nr(bus);
> +	busnum = bus->number;
> +
> +	if (val == ~0 && len == 4) {
> +		unsigned long decode;
> +
> +		raw_pci_ext_ops->read(domain, busnum, devfn,
> +				      offset + 8 + (bar * 4), 4, &size);
> +
> +		/* Turn the size into a decode pattern for the sizing code */
> +		if (size) {
> +			decode = size - 1;
> +			decode |= decode >> 1;
> +			decode |= decode >> 2;
> +			decode |= decode >> 4;
> +			decode |= decode >> 8;
> +			decode |= decode >> 16;
> +			decode++;
> +			decode = ~(decode - 1);
> +		} else {
> +			decode = ~0;
> +		}
> +
> +		/*
> +		 * If val is all ones, the core code is trying to size the reg,
> +		 * so update the mmconfig space with the real size.
> +		 *
> +		 * Note: this assumes the fixed size we got is a power of two.
> +		 */
> +		return raw_pci_ext_ops->write(domain, busnum, devfn, reg, 4,
> +					      decode);
> +	}
> +
> +	/* This is some other kind of BAR write, so just do it. */
> +	return raw_pci_ext_ops->write(domain, busnum, devfn, reg, len, val);
> +}

What if we made __pci_read_base() a __weak function, and you overrode
it that way?

> +/**
> + * type1_access_ok - check whether to use type 1
> + * @bus: bus number
> + * @devfn: device & function in question
> + *
> + * If the bus is on a Lincroft chip and it exists, or is not on a Lincroft at
> + * all, the we can go ahead with any reads & writes.  If it's on a Lincroft,
> + * but doesn't exist, avoid the access altogether to keep the chip from
> + * hanging.
> + */
> +static bool type1_access_ok(unsigned int bus, unsigned int devfn, int reg)
> +{
> +	/* This is a workaround for A0 LNC bug where PCI status register does
> +	 * not have new CAP bit set. can not be written by SW either.
> +	 *
> +	 * PCI header type in real LNC indicates a single function device, this
> +	 * will prevent probing other devices under the same function in PCI
> +	 * shim. Therefore, use the header type in shim instead.
> +	 */
> +	if (reg >= 0x100 || reg == PCI_STATUS || reg == PCI_HEADER_TYPE)
> +		return 0;
> +	if (bus == 0 && (devfn == PCI_DEVFN(2, 0) || devfn == PCI_DEVFN(0, 0)))
> +		return 1;
> +	return 0; /* langwell on others */
> +}
> +
> +static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
> +		    int size, u32 *value)
> +{
> +	if (type1_access_ok(bus->number, devfn, where))
> +		return raw_pci_ops->read(pci_domain_nr(bus), bus->number, devfn,
> +				   where, size, value);
> +	return raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number,
> +				     devfn, where, size, value);
> +}
> +
> +static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
> +		     int size, u32 value)
> +{
> +	int offset;
> +
> +	/* On MRST, there is no PCI ROM BAR, this will cause a subsequent read
> +	 * to ROM BAR return 0 then being ignored.
> +	 */
> +	if (where == PCI_ROM_ADDRESS)
> +		return 0;
> +	/*
> +	 * Devices with fixed BARs need special handling:
> +	 *   - BAR sizing code will save, write ~0, read size, restore
> +	 *   - so writes to fixed BARs need special handling
> +	 *   - other writes to fixed BAR devices should go through mmconfig
> +	 */
> +	offset = fixed_bar_cap(bus, devfn);
> +	if (offset &&
> +	    (where >= PCI_BASE_ADDRESS_0 && where <= PCI_BASE_ADDRESS_5)) {
> +		return pci_device_update_fixed(bus, devfn, where, size, value,
> +			offset);
> +	}
> +
> +	/*
> +	 * On Moorestown update both real & mmconfig space
> +	 * Note: assumes raw_pci_ext_ops is mmconfig if we're using Lincroft
> +	 * Note 2: early Lincroft silicon can't handle type 1 accesses to
> +	 *         non-existent devices, so just eat the write in that case.
> +	 */
> +	if (type1_access_ok(bus->number, devfn, where))
> +		return raw_pci_ops->write(pci_domain_nr(bus), bus->number,
> +				devfn, where, size, value);
> +	return raw_pci_ext_ops->write(pci_domain_nr(bus), bus->number, devfn,
> +				      where, size, value);
> +}
> +
> +struct pci_ops pci_mrst_ops = {
> +	.read = pci_read,
> +	.write = pci_write,
> +};
> +#ifdef CONFIG_MRST
> +/**
> + * pci_mrst_init - figure out if this platform should use pci_mrst_ops
> + *
> + * Moorestown has an interesting PCI implementation (see above).  This
> + * function checks whether the current platform should use MRST-style PCI
> + * config space ops or not, and sets pci_root_ops accordingly.
> + */
> +void pci_mrst_init(void)
> +{
> +	printk(KERN_INFO "Moorestown platform detected, using MRST PCI ops\n");
> +	pci_root_ops = pci_mrst_ops;
> +}
> +#else
> +void pci_mrst_init(void) { }
> +#endif
> +
> +#ifdef CONFIG_MRST
> +/*
> + * Langwell devices reside at fixed offsets, don't try to move them.
> + */
> +static void __devinit pci_fixed_bar_fixup(struct pci_dev *dev)
> +{
> +	unsigned long offset;
> +	u32 size;
> +	int i;
> +	/* Fixup the BAR sizes for fixed BAR devices and make them unmoveable */
> +	offset = fixed_bar_cap(dev->bus, dev->devfn);
> +	if (!offset || PCI_DEVFN(2, 0) == dev->devfn ||
> +	    PCI_DEVFN(2, 2) == dev->devfn)
> +		return;
> +
> +	for (i = 0; i < PCI_ROM_RESOURCE; i++) {
> +		pci_read_config_dword(dev, offset + 8 + (i * 4), &size);
> +		dev->resource[i].end = dev->resource[i].start + size - 1;
> +		dev->resource[i].flags |= IORESOURCE_PCI_FIXED;
> +	}
> +}
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixed_bar_fixup);
> +#endif
> +
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 72698d8..fe4d18d 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -708,6 +708,7 @@ int pci_execute_reset_function(struct pci_dev *dev);
>  void pci_update_resource(struct pci_dev *dev, int resno);
>  int __must_check pci_assign_resource(struct pci_dev *dev, int i);
>  int pci_select_bars(struct pci_dev *dev, unsigned long flags);
> +int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn);
>  
>  /* ROM control related routines */
>  int pci_enable_rom(struct pci_dev *pdev);
> diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
> index 0f71812..1a6e34a 100644
> --- a/include/linux/pci_ids.h
> +++ b/include/linux/pci_ids.h
> @@ -2371,6 +2371,8 @@
>  #define PCI_DEVICE_ID_INTEL_82375	0x0482
>  #define PCI_DEVICE_ID_INTEL_82424	0x0483
>  #define PCI_DEVICE_ID_INTEL_82378	0x0484
> +#define PCI_DEVICE_ID_INTEL_MRST_SD0	0x0807
> +#define PCI_DEVICE_ID_INTEL_MRST_SD1	0x0808
>  #define PCI_DEVICE_ID_INTEL_I960	0x0960
>  #define PCI_DEVICE_ID_INTEL_I960RM	0x0962
>  #define PCI_DEVICE_ID_INTEL_8257X_SOL	0x1062
> @@ -2539,6 +2541,7 @@
>  #define PCI_DEVICE_ID_INTEL_PCH_LPC_MAX	0x3b1f
>  #define PCI_DEVICE_ID_INTEL_PCH_SMBUS	0x3b30
>  #define PCI_DEVICE_ID_INTEL_IOAT_SNB	0x402f
> +#define PCI_DEVICE_ID_INTEL_MRST_GFX	0x4102
>  #define PCI_DEVICE_ID_INTEL_5100_16	0x65f0
>  #define PCI_DEVICE_ID_INTEL_5100_21	0x65f5
>  #define PCI_DEVICE_ID_INTEL_5100_22	0x65f6
> diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
> index 616bf8b..4b412ae 100644
> --- a/include/linux/pci_regs.h
> +++ b/include/linux/pci_regs.h
> @@ -503,6 +503,7 @@
>  #define PCI_EXT_CAP_ID_PWR	4
>  #define PCI_EXT_CAP_ID_ARI	14
>  #define PCI_EXT_CAP_ID_SRIOV	16
> +#define PCI_EXT_CAP_ID_VNDR	11

I think these should be kept in order by value ...
Greg KH June 11, 2009, 9:57 p.m. UTC | #2
On Thu, Jun 11, 2009 at 02:23:11PM -0700, Jesse Barnes wrote:
> --- a/arch/x86/pci/fixup.c
> +++ b/arch/x86/pci/fixup.c
> @@ -521,3 +521,13 @@ static void sb600_disable_hpet_bar(struct pci_dev *dev)
>  	}
>  }
>  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, 0x4385, sb600_disable_hpet_bar);
> +/* Intel Moorestown graphics controller has new capabilities but the status
> + * register does not indicate that in bit 4.
> + */
> +static void __devinit pci_mrst_gfx_controller_cap(struct pci_dev *dev)
> +{
> +	/* force new cap flag */
> +	pci_write_config_byte(dev, PCI_STATUS, PCI_STATUS_CAP_LIST);
> +}
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MRST_GFX,
> +			  pci_mrst_gfx_controller_cap);

As this is an "unreleased" chip, can't this be fixed up properly before
it ships?

> diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c
> index 25a1f8e..1726d55 100644
> --- a/arch/x86/pci/init.c
> +++ b/arch/x86/pci/init.c
> @@ -14,6 +14,10 @@ static __init int pci_arch_init(void)
>  
>  	if (!(pci_probe & PCI_PROBE_NOEARLY))
>  		pci_mmcfg_early_init();
> +#ifdef CONFIG_PCI_MMCONFIG
> +	pci_mmcfg_late_init();
> +	pci_mrst_init();
> +#endif

Is the #ifdef really needed here?  We don't do it for
pci_mmcfg_early_init().

> +	if (!platform_has(X86_PLATFORM_FEATURE_BIOS)) {
> +		DBG(KERN_DEBUG "PCI: No BIOS, skip IRQ fixup\n");
> +		return;
> +	}

That's funny to see :)


> +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SFI)
> +	/* For platforms only have IOAPIC, the PCI irq line is 1:1 mapped to
> +	 * IOAPIC RTE entries, so we just enable RTE for the device.
> +	 */
> +	if (platform_has(X86_PLATFORM_FEATURE_IOAPIC) &&
> +		!platform_has(X86_PLATFORM_FEATURE_8259) &&
> +		!platform_has(X86_PLATFORM_FEATURE_ACPI) &&
> +		!platform_has(X86_PLATFORM_FEATURE_BIOS) &&
> +		pin) {
> +		ioapic = mp_sfi_find_ioapic(dev->irq);
> +		io_apic_set_pci_routing(ioapic,
> +			dev->irq, /* IOAPIC pin */
> +			dev->irq, /* IRQ line */
> +			1, /* level trigger */
> +			1); /* polarity active low */
> +	}
> +#endif

Does there really need to be a #ifdef here?  Wouldn't the first
platform_has() catch things?  And what is CONFIG_SFI?

> diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
> index 8766b0e..4ff971a 100644
> --- a/arch/x86/pci/mmconfig-shared.c
> +++ b/arch/x86/pci/mmconfig-shared.c
> @@ -17,6 +17,7 @@
>  #include <linux/sort.h>
>  #include <asm/e820.h>
>  #include <asm/pci_x86.h>
> +#include <linux/sfi.h>

Is sfi.h in some other tree?

> -	if (!acpi_disabled || pci_mmcfg_config_num || mcp55_checked)
> +#ifdef CONFIG_ACPI
> +
> +	if (!acpi_disabled)
> +		return NULL;
> +#endif

Is the #ifdef really needed?

What happens if you run a CONFIG_ACPI enabled kernel on this platform?
Does it not boot properly?

Same #ifdef questions for other additions to this file, why is it
needed?

> +#ifdef CONFIG_SFI
> +		sfi_table_parse(SFI_SIG_MCFG, sfi_parse_mcfg);
> +#else
>  		acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg);
> +#endif

Is #config needed?  What happens if SFI and ACPI are enabled in the
.config?


> +#ifndef CONFIG_SFI
>  	pci_mmcfg_reject_broken(early);
> +#endif

#ifdef needed?

> +++ b/arch/x86/pci/mrst.c
> @@ -0,0 +1,236 @@
> +/*
> + * Moorestown PCI support
> + *   Copyright (c) 2008 Intel Corporation
> + *     Jesse Barnes <jesse.barnes@intel.com>
> + *
> + * Moorestown has an interesting PCI implementation:
> + *   - configuration space is memory mapped (as defined by MCFG)
> + *   - Lincroft devices also have a real, type 1 configuration space
> + *   - Early Lincroft silicon has a type 1 access bug that will cause
> + *     a hang if non-existent devices are accessed
> + *   - some devices have the "fixed BAR" capability, which means
> + *     they can't be relocated or modified; check for that during
> + *     BAR sizing
> + *
> + * So, we use the MCFG space for all reads and writes, but also send
> + * Lincroft writes to type 1 space.  But only read/write if the device
> + * actually exists, otherwise return all 1s for reads and bit bucket
> + * the writes.
> + *
> + * Here we assume that type 1 and MCFG based access has already been set up,
> + * such that raw_pci_ops is type 1 and raw_pci_ext_ops is MCFG.
> + */
> +
> +#include <linux/sched.h>
> +#include <linux/pci.h>
> +#include <linux/ioport.h>
> +#include <linux/init.h>
> +#include <linux/dmi.h>
> +
> +#include <asm/acpi.h>
> +#include <asm/segment.h>
> +#include <asm/io.h>
> +#include <asm/smp.h>
> +#include <asm/pci_x86.h>
> +/*
> + * Yuck, we really need to add PCIe capability functions to the core
> + */
> +#define PCIE_CAP_OFFSET	0x100
> +
> +/* Fixed BAR fields */
> +#define PCIE_VNDR_CAP_ID_FIXED_BAR 0x00	/* Fixed BAR (TBD) */
> +#define PCI_FIXED_BAR_0_SIZE	0x04
> +#define PCI_FIXED_BAR_1_SIZE	0x08
> +#define PCI_FIXED_BAR_2_SIZE	0x0c
> +#define PCI_FIXED_BAR_3_SIZE	0x10
> +#define PCI_FIXED_BAR_4_SIZE	0x14
> +#define PCI_FIXED_BAR_5_SIZE	0x1c
> +
> +int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn)
> +{
> +	int pos;
> +	u32 pcie_cap = 0, cap_data;
> +
> +	pos = PCIE_CAP_OFFSET;
> +	while (pos) {
> +		if (raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number,
> +					  devfn, pos, 4, &pcie_cap))
> +			return 0;
> +
> +		if (pcie_cap == 0xffffffff)
> +			return 0;
> +
> +		if (PCI_EXT_CAP_ID(pcie_cap) == PCI_EXT_CAP_ID_VNDR) {
> +			raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number,
> +					      devfn, pos + 4, 4, &cap_data);
> +			if ((cap_data & 0xffff) == PCIE_VNDR_CAP_ID_FIXED_BAR)
> +				return pos;
> +		}
> +
> +		pos = pcie_cap >> 20;
> +	}
> +
> +	return 0;
> +}
> +
> +static int pci_device_update_fixed(struct pci_bus *bus, unsigned int devfn,
> +				   int reg, int len, u32 val, int offset)
> +{
> +	u32 size;
> +	unsigned int domain, busnum;
> +	int bar = (reg - PCI_BASE_ADDRESS_0) >> 2;
> +
> +	domain = pci_domain_nr(bus);
> +	busnum = bus->number;
> +
> +	if (val == ~0 && len == 4) {
> +		unsigned long decode;
> +
> +		raw_pci_ext_ops->read(domain, busnum, devfn,
> +				      offset + 8 + (bar * 4), 4, &size);
> +
> +		/* Turn the size into a decode pattern for the sizing code */
> +		if (size) {
> +			decode = size - 1;
> +			decode |= decode >> 1;
> +			decode |= decode >> 2;
> +			decode |= decode >> 4;
> +			decode |= decode >> 8;
> +			decode |= decode >> 16;
> +			decode++;
> +			decode = ~(decode - 1);
> +		} else {
> +			decode = ~0;
> +		}
> +
> +		/*
> +		 * If val is all ones, the core code is trying to size the reg,
> +		 * so update the mmconfig space with the real size.
> +		 *
> +		 * Note: this assumes the fixed size we got is a power of two.
> +		 */
> +		return raw_pci_ext_ops->write(domain, busnum, devfn, reg, 4,
> +					      decode);
> +	}
> +
> +	/* This is some other kind of BAR write, so just do it. */
> +	return raw_pci_ext_ops->write(domain, busnum, devfn, reg, len, val);
> +}
> +
> +/**
> + * type1_access_ok - check whether to use type 1
> + * @bus: bus number
> + * @devfn: device & function in question
> + *
> + * If the bus is on a Lincroft chip and it exists, or is not on a Lincroft at
> + * all, the we can go ahead with any reads & writes.  If it's on a Lincroft,
> + * but doesn't exist, avoid the access altogether to keep the chip from
> + * hanging.
> + */
> +static bool type1_access_ok(unsigned int bus, unsigned int devfn, int reg)
> +{
> +	/* This is a workaround for A0 LNC bug where PCI status register does
> +	 * not have new CAP bit set. can not be written by SW either.
> +	 *
> +	 * PCI header type in real LNC indicates a single function device, this
> +	 * will prevent probing other devices under the same function in PCI
> +	 * shim. Therefore, use the header type in shim instead.
> +	 */
> +	if (reg >= 0x100 || reg == PCI_STATUS || reg == PCI_HEADER_TYPE)
> +		return 0;
> +	if (bus == 0 && (devfn == PCI_DEVFN(2, 0) || devfn == PCI_DEVFN(0, 0)))
> +		return 1;
> +	return 0; /* langwell on others */
> +}
> +
> +static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
> +		    int size, u32 *value)
> +{
> +	if (type1_access_ok(bus->number, devfn, where))
> +		return raw_pci_ops->read(pci_domain_nr(bus), bus->number, devfn,
> +				   where, size, value);
> +	return raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number,
> +				     devfn, where, size, value);
> +}
> +
> +static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
> +		     int size, u32 value)
> +{
> +	int offset;
> +
> +	/* On MRST, there is no PCI ROM BAR, this will cause a subsequent read
> +	 * to ROM BAR return 0 then being ignored.
> +	 */
> +	if (where == PCI_ROM_ADDRESS)
> +		return 0;
> +	/*
> +	 * Devices with fixed BARs need special handling:
> +	 *   - BAR sizing code will save, write ~0, read size, restore
> +	 *   - so writes to fixed BARs need special handling
> +	 *   - other writes to fixed BAR devices should go through mmconfig
> +	 */
> +	offset = fixed_bar_cap(bus, devfn);
> +	if (offset &&
> +	    (where >= PCI_BASE_ADDRESS_0 && where <= PCI_BASE_ADDRESS_5)) {
> +		return pci_device_update_fixed(bus, devfn, where, size, value,
> +			offset);
> +	}
> +
> +	/*
> +	 * On Moorestown update both real & mmconfig space
> +	 * Note: assumes raw_pci_ext_ops is mmconfig if we're using Lincroft
> +	 * Note 2: early Lincroft silicon can't handle type 1 accesses to
> +	 *         non-existent devices, so just eat the write in that case.
> +	 */
> +	if (type1_access_ok(bus->number, devfn, where))
> +		return raw_pci_ops->write(pci_domain_nr(bus), bus->number,
> +				devfn, where, size, value);
> +	return raw_pci_ext_ops->write(pci_domain_nr(bus), bus->number, devfn,
> +				      where, size, value);
> +}
> +
> +struct pci_ops pci_mrst_ops = {
> +	.read = pci_read,
> +	.write = pci_write,
> +};
> +#ifdef CONFIG_MRST

Why would you want the other portions of this file to be enabled/built
in, if CONFIG_MRST is not set?  Could you just not build the file in if
the config option isn't enabled?

> +/**
> + * pci_mrst_init - figure out if this platform should use pci_mrst_ops
> + *
> + * Moorestown has an interesting PCI implementation (see above).  This
> + * function checks whether the current platform should use MRST-style PCI
> + * config space ops or not, and sets pci_root_ops accordingly.
> + */
> +void pci_mrst_init(void)
> +{
> +	printk(KERN_INFO "Moorestown platform detected, using MRST PCI ops\n");
> +	pci_root_ops = pci_mrst_ops;
> +}
> +#else
> +void pci_mrst_init(void) { }
> +#endif

That looks like something that should go into a .h file.

Other than the minor structuring stuff, it looks almost sane :)

Nice job to Intel for doing the implementation this way, it could have
been so much worse.

thanks,

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jesse Barnes June 11, 2009, 9:59 p.m. UTC | #3
On Thu, 11 Jun 2009 15:52:55 -0600
Matthew Wilcox <matthew@wil.cx> wrote:

> On Thu, Jun 11, 2009 at 02:23:11PM -0700, Jesse Barnes wrote:
> > +++ b/arch/x86/pci/Makefile
> > @@ -13,5 +13,5 @@ obj-$(CONFIG_X86_VISWS)		+= visws.o
> >  
> >  obj-$(CONFIG_X86_NUMAQ)		+= numaq_32.o
> >  
> > -obj-y				+= common.o early.o
> > +obj-y				+= common.o early.o mrst.o
> 
> We don't want a CONFIG_PCI_MOORESTOWN, like we have for
> CONFIG_PCI_OLPC?

Sure, I suppose so.

> > +++ b/arch/x86/pci/init.c
> > @@ -14,6 +14,10 @@ static __init int pci_arch_init(void)
> >  
> >  	if (!(pci_probe & PCI_PROBE_NOEARLY))
> >  		pci_mmcfg_early_init();
> > +#ifdef CONFIG_PCI_MMCONFIG
> > +	pci_mmcfg_late_init();
> > +	pci_mrst_init();
> > +#endif
> 
> We should have empty stubs for these functions for the !MMCONFIG case.

Good point, will fix.

> > +++ b/arch/x86/pci/irq.c
> >  	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
> > +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SFI)
> > +	/* For platforms only have IOAPIC, the PCI irq line is 1:1
> > mapped to
> > +	 * IOAPIC RTE entries, so we just enable RTE for the
> > device.
> > +	 */
> 
> The comment is a little garbled.  Is the assumption here that all SFI
> platforms have an IOAPIC?

I think that's the case, but I'll let Jacob answer that.

> > -	if (!acpi_disabled || pci_mmcfg_config_num ||
> > mcp55_checked) +#ifdef CONFIG_ACPI
> > +
> > +	if (!acpi_disabled)
> > +		return NULL;
> > +#endif
> > +	if (pci_mmcfg_config_num || mcp55_checked)
> >  		return NULL;
> 
> acpi_disabled definitely should be 0 if CONFIG_ACPI isn't set.

Oh good, one less #ifdef.

> > -	if (!known_bridge)
> > +	if (!known_bridge) {
> > +#ifdef CONFIG_SFI
> > +		sfi_table_parse(SFI_SIG_MCFG, sfi_parse_mcfg);
> > +#else
> >  		acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg);
> > +#endif
> > +	}
> 
> SFI and ACPI are mutually exclusive?

I think this bit was addressed in another patch actually.  And
collapsed into a single function.  I'll drop this hunk.

> > @@ -12,6 +12,7 @@
> >  #include <linux/pci.h>
> >  #include <linux/init.h>
> >  #include <linux/acpi.h>
> > +#include <linux/sfi.h>
> >  #include <asm/e820.h>
> >  #include <asm/pci_x86.h>
> >  
> 
> That's the only change to this file?  Looks stray.

Yep, messy commit.  Will drop.

> > diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c
> > new file mode 100644
> > index 0000000..1d69381
> > --- /dev/null
> > +++ b/arch/x86/pci/mrst.c
> > @@ -0,0 +1,236 @@
> > +/*
> > + * Moorestown PCI support
> > + *   Copyright (c) 2008 Intel Corporation
> > + *     Jesse Barnes <jesse.barnes@intel.com>
> > + *
> > + * Moorestown has an interesting PCI implementation:
> > + *   - configuration space is memory mapped (as defined by MCFG)
> > + *   - Lincroft devices also have a real, type 1 configuration
> > space
> > + *   - Early Lincroft silicon has a type 1 access bug that will
> > cause
> > + *     a hang if non-existent devices are accessed
> > + *   - some devices have the "fixed BAR" capability, which means
> > + *     they can't be relocated or modified; check for that during
> > + *     BAR sizing
> > + *
> > + * So, we use the MCFG space for all reads and writes, but also
> > send
> > + * Lincroft writes to type 1 space.  But only read/write if the
> > device
> > + * actually exists, otherwise return all 1s for reads and bit
> > bucket
> > + * the writes.
> > + *
> > + * Here we assume that type 1 and MCFG based access has already
> > been set up,
> > + * such that raw_pci_ops is type 1 and raw_pci_ext_ops is MCFG.
> 
> How about using pci_direct_conf1 explicitly?  We can make pci_mmcfg
> non-static too.

Hm, I'll look at that.  I went back and forth on a couple of
possibilities here.

> 
> > + */
> > +
> > +#include <linux/sched.h>
> > +#include <linux/pci.h>
> > +#include <linux/ioport.h>
> > +#include <linux/init.h>
> > +#include <linux/dmi.h>
> > +
> > +#include <asm/acpi.h>
> > +#include <asm/segment.h>
> > +#include <asm/io.h>
> > +#include <asm/smp.h>
> > +#include <asm/pci_x86.h>
> > +/*
> > + * Yuck, we really need to add PCIe capability functions to the
> > core
> > + */
> > +#define PCIE_CAP_OFFSET	0x100
> 
> We've used PCI_CFG_SPACE_SIZE for this value before now ...

That would work, will fix when I move this to the core.

> > +/* Fixed BAR fields */
> > +#define PCIE_VNDR_CAP_ID_FIXED_BAR 0x00	/* Fixed BAR (TBD)
> > */ +#define PCI_FIXED_BAR_0_SIZE	0x04
> > +#define PCI_FIXED_BAR_1_SIZE	0x08
> > +#define PCI_FIXED_BAR_2_SIZE	0x0c
> > +#define PCI_FIXED_BAR_3_SIZE	0x10
> > +#define PCI_FIXED_BAR_4_SIZE	0x14
> > +#define PCI_FIXED_BAR_5_SIZE	0x1c
> > +
> > +int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn)
> > +{
> > +	int pos;
> > +	u32 pcie_cap = 0, cap_data;
> > +
> > +	pos = PCIE_CAP_OFFSET;
> > +	while (pos) {
> > +		if (raw_pci_ext_ops->read(pci_domain_nr(bus),
> > bus->number,
> > +					  devfn, pos, 4,
> > &pcie_cap))
> > +			return 0;
> > +
> > +		if (pcie_cap == 0xffffffff)
> > +			return 0;
> > +
> > +		if (PCI_EXT_CAP_ID(pcie_cap) ==
> > PCI_EXT_CAP_ID_VNDR) {
> > +			raw_pci_ext_ops->read(pci_domain_nr(bus),
> > bus->number,
> > +					      devfn, pos + 4, 4,
> > &cap_data);
> > +			if ((cap_data & 0xffff) ==
> > PCIE_VNDR_CAP_ID_FIXED_BAR)
> > +				return pos;
> > +		}
> > +
> > +		pos = pcie_cap >> 20;
> > +	}
> > +
> > +	return 0;
> > +}
> 
> Do we need to find this before we can call pci_find_ext_capability()?
> Can we create pci_bus_find_ext_capability() analagous to
> pci_bus_find_capability()?  I hate to see this kind of functionality
> hidden in an arch file somewhere.

Agreed, I'll pull it out into generic code (as you can see by the
comment it's something I should have done already).

> > +static int pci_device_update_fixed(struct pci_bus *bus, unsigned
> > int devfn,
> > +				   int reg, int len, u32 val, int
> > offset) +{
> > +	u32 size;
> > +	unsigned int domain, busnum;
> > +	int bar = (reg - PCI_BASE_ADDRESS_0) >> 2;
> > +
> > +	domain = pci_domain_nr(bus);
> > +	busnum = bus->number;
> > +
> > +	if (val == ~0 && len == 4) {
> > +		unsigned long decode;
> > +
> > +		raw_pci_ext_ops->read(domain, busnum, devfn,
> > +				      offset + 8 + (bar * 4), 4,
> > &size); +
> > +		/* Turn the size into a decode pattern for the
> > sizing code */
> > +		if (size) {
> > +			decode = size - 1;
> > +			decode |= decode >> 1;
> > +			decode |= decode >> 2;
> > +			decode |= decode >> 4;
> > +			decode |= decode >> 8;
> > +			decode |= decode >> 16;
> > +			decode++;
> > +			decode = ~(decode - 1);
> > +		} else {
> > +			decode = ~0;
> > +		}
> > +
> > +		/*
> > +		 * If val is all ones, the core code is trying to
> > size the reg,
> > +		 * so update the mmconfig space with the real size.
> > +		 *
> > +		 * Note: this assumes the fixed size we got is a
> > power of two.
> > +		 */
> > +		return raw_pci_ext_ops->write(domain, busnum,
> > devfn, reg, 4,
> > +					      decode);
> > +	}
> > +
> > +	/* This is some other kind of BAR write, so just do it. */
> > +	return raw_pci_ext_ops->write(domain, busnum, devfn, reg,
> > len, val); +}
> 
> What if we made __pci_read_base() a __weak function, and you overrode
> it that way?

I'll look into that; might be simpler.

> > diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
> > index 616bf8b..4b412ae 100644
> > --- a/include/linux/pci_regs.h
> > +++ b/include/linux/pci_regs.h
> > @@ -503,6 +503,7 @@
> >  #define PCI_EXT_CAP_ID_PWR	4
> >  #define PCI_EXT_CAP_ID_ARI	14
> >  #define PCI_EXT_CAP_ID_SRIOV	16
> > +#define PCI_EXT_CAP_ID_VNDR	11
> 
> I think these should be kept in order by value ...

Yep, will fix.  Thanks a lot for the review.
Jacob Pan June 11, 2009, 10:01 p.m. UTC | #4
>> +++ b/arch/x86/pci/irq.c
>>  	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
>> +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SFI)
>> +	/* For platforms only have IOAPIC, the PCI irq line is 1:1 mapped to
>> +	 * IOAPIC RTE entries, so we just enable RTE for the device.
>> +	 */
>
>The comment is a little garbled.  Is the assumption here that all SFI
>platforms have an IOAPIC?
>
[[JPAN]] No, that is not the assumption. So we have the platform feature check below to see if there are IOAPIC exposed in SFI. The platform feature flags will be in another patch and posted soon.

>> +	if (platform_has(X86_PLATFORM_FEATURE_IOAPIC) &&
>> +		!platform_has(X86_PLATFORM_FEATURE_8259) &&
>> +		!platform_has(X86_PLATFORM_FEATURE_ACPI) &&
>> +		!platform_has(X86_PLATFORM_FEATURE_BIOS) &&
>> +		pin) {
>> +		ioapic = mp_sfi_find_ioapic(dev->irq);
>> +		io_apic_set_pci_routing(ioapic,
>> +			dev->irq, /* IOAPIC pin */
>> +			dev->irq, /* IRQ line */
>> +			1, /* level trigger */
>> +			1); /* polarity active low */
>> +	}
>> +#endif
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jacob Pan June 12, 2009, 2:39 p.m. UTC | #5
>-----Original Message-----
>From: Greg KH [mailto:greg@kroah.com]
>Sent: Thursday, June 11, 2009 2:58 PM
>To: Jesse Barnes
>Cc: linux-pci@vger.kernel.org; x86@kernel.org; Pan, Jacob jun
>Subject: Re: [PATCH] x86/PCI: Intel Moorestown platform "PCI" support
>
>On Thu, Jun 11, 2009 at 02:23:11PM -0700, Jesse Barnes wrote:
>> --- a/arch/x86/pci/fixup.c
>> +++ b/arch/x86/pci/fixup.c
>> @@ -521,3 +521,13 @@ static void sb600_disable_hpet_bar(struct pci_dev *dev)
>>  	}
>>  }
>>  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, 0x4385, sb600_disable_hpet_bar);
>> +/* Intel Moorestown graphics controller has new capabilities but the status
>> + * register does not indicate that in bit 4.
>> + */
>> +static void __devinit pci_mrst_gfx_controller_cap(struct pci_dev *dev)
>> +{
>> +	/* force new cap flag */
>> +	pci_write_config_byte(dev, PCI_STATUS, PCI_STATUS_CAP_LIST);
>> +}
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MRST_GFX,
>> +			  pci_mrst_gfx_controller_cap);
>
>As this is an "unreleased" chip, can't this be fixed up properly before
>it ships?
>
[[JPAN]] I will work with the HW team to track the request. It was decided "not to fix" in the second stepping while the bug was found in the first stepping.

>
>> +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SFI)
>> +	/* For platforms only have IOAPIC, the PCI irq line is 1:1 mapped to
>> +	 * IOAPIC RTE entries, so we just enable RTE for the device.
>> +	 */
>> +	if (platform_has(X86_PLATFORM_FEATURE_IOAPIC) &&
>> +		!platform_has(X86_PLATFORM_FEATURE_8259) &&
>> +		!platform_has(X86_PLATFORM_FEATURE_ACPI) &&
>> +		!platform_has(X86_PLATFORM_FEATURE_BIOS) &&
>> +		pin) {
>> +		ioapic = mp_sfi_find_ioapic(dev->irq);
>> +		io_apic_set_pci_routing(ioapic,
>> +			dev->irq, /* IOAPIC pin */
>> +			dev->irq, /* IRQ line */
>> +			1, /* level trigger */
>> +			1); /* polarity active low */
>> +	}
>> +#endif
>
>Does there really need to be a #ifdef here?  Wouldn't the first
>platform_has() catch things?  And what is CONFIG_SFI?
>
[[JPAN]] I agree, platform_has() check should be sufficient. Perhaps we should have a dummy io_apic_set_pci_routing() when CONFIG_X86_IO_APIC is not defined?

SFI is the Simple Firmware Interface, it provides static system tables similar to ACPI. System devices such as APIC and timers are included in the SFI tables.
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Greg KH June 12, 2009, 3:47 p.m. UTC | #6
On Fri, Jun 12, 2009 at 07:39:11AM -0700, Pan, Jacob jun wrote:
> >> +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SFI)
> >> +	/* For platforms only have IOAPIC, the PCI irq line is 1:1 mapped to
> >> +	 * IOAPIC RTE entries, so we just enable RTE for the device.
> >> +	 */
> >> +	if (platform_has(X86_PLATFORM_FEATURE_IOAPIC) &&
> >> +		!platform_has(X86_PLATFORM_FEATURE_8259) &&
> >> +		!platform_has(X86_PLATFORM_FEATURE_ACPI) &&
> >> +		!platform_has(X86_PLATFORM_FEATURE_BIOS) &&
> >> +		pin) {
> >> +		ioapic = mp_sfi_find_ioapic(dev->irq);
> >> +		io_apic_set_pci_routing(ioapic,
> >> +			dev->irq, /* IOAPIC pin */
> >> +			dev->irq, /* IRQ line */
> >> +			1, /* level trigger */
> >> +			1); /* polarity active low */
> >> +	}
> >> +#endif
> >
> >Does there really need to be a #ifdef here?  Wouldn't the first
> >platform_has() catch things?  And what is CONFIG_SFI?
> >
> [[JPAN]] I agree, platform_has() check should be sufficient. Perhaps
> we should have a dummy io_apic_set_pci_routing() when
> CONFIG_X86_IO_APIC is not defined?

Yes, that would be a good idea to have.

> SFI is the Simple Firmware Interface, it provides static system tables
> similar to ACPI. System devices such as APIC and timers are included
> in the SFI tables.

But I do not see CONFIG_SFI or the sfi.h header file in Linus's kernel
tree.  Has it been submitted to some subsystem tree already?

And can you build a single kernel image that supports SFI and ACPI?  It
looks from this patch that this is not the case, which is not a good
thing for the distros at all.

thanks,

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jesse Barnes July 15, 2009, 7:13 p.m. UTC | #7
Was just refreshing this patch now that some of the other stuff it
needs has started to land.  I'll be pushing it to a mrst branch in my
PCI tree soon for testing & more review.

On Thu, 11 Jun 2009 14:59:45 -0700
Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> > > + * Here we assume that type 1 and MCFG based access has already
> > > been set up,
> > > + * such that raw_pci_ops is type 1 and raw_pci_ext_ops is MCFG.
> > 
> > How about using pci_direct_conf1 explicitly?  We can make pci_mmcfg
> > non-static too.
> 
> Hm, I'll look at that.  I went back and forth on a couple of
> possibilities here.

I think I can do this, it would make things clearer.


> > Do we need to find this before we can call
> > pci_find_ext_capability()? Can we create
> > pci_bus_find_ext_capability() analagous to
> > pci_bus_find_capability()?  I hate to see this kind of
> > functionality hidden in an arch file somewhere.
> 
> Agreed, I'll pull it out into generic code (as you can see by the
> comment it's something I should have done already).

Started looking at doing this, and remembered why I needed the funky
code.  At one point I was using the fixed bar check on the read side,
so if I used generic code to look for the cap I'd end up with infinite
recursion: pci_read->fixed_bar_cap->pci_read->...

However I don't need that anymore, so it should be safe to use a
generic routine here (as long as the cap walking code never does a
write! :)

That said I think I would need a pci_bus variant, since I'll be using
it from pci_write...

I'm addressing the rest of the comments now, will post a new patch
shortly (though still missing a few of the generic bits that are still
being pushed, like SFI and platform feature bits).
diff mbox

Patch

diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index d49202e..7b0cc85 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -13,5 +13,5 @@  obj-$(CONFIG_X86_VISWS)		+= visws.o
 
 obj-$(CONFIG_X86_NUMAQ)		+= numaq_32.o
 
-obj-y				+= common.o early.o
+obj-y				+= common.o early.o mrst.o
 obj-y				+= amd_bus.o
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 6dd8955..eb1586f 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -521,3 +521,13 @@  static void sb600_disable_hpet_bar(struct pci_dev *dev)
 	}
 }
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, 0x4385, sb600_disable_hpet_bar);
+/* Intel Moorestown graphics controller has new capabilities but the status
+ * register does not indicate that in bit 4.
+ */
+static void __devinit pci_mrst_gfx_controller_cap(struct pci_dev *dev)
+{
+	/* force new cap flag */
+	pci_write_config_byte(dev, PCI_STATUS, PCI_STATUS_CAP_LIST);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MRST_GFX,
+			  pci_mrst_gfx_controller_cap);
diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c
index 25a1f8e..1726d55 100644
--- a/arch/x86/pci/init.c
+++ b/arch/x86/pci/init.c
@@ -14,6 +14,10 @@  static __init int pci_arch_init(void)
 
 	if (!(pci_probe & PCI_PROBE_NOEARLY))
 		pci_mmcfg_early_init();
+#ifdef CONFIG_PCI_MMCONFIG
+	pci_mmcfg_late_init();
+	pci_mrst_init();
+#endif
 
 #ifdef CONFIG_PCI_OLPC
 	if (!pci_olpc_init())
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index fecbce6..2931756 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -14,6 +14,7 @@ 
 #include <linux/io.h>
 #include <linux/smp.h>
 #include <asm/io_apic.h>
+#include <asm/platform_feature.h>
 #include <linux/irq.h>
 #include <linux/acpi.h>
 #include <asm/pci_x86.h>
@@ -1019,6 +1020,10 @@  static void __init pcibios_fixup_irqs(void)
 	u8 pin;
 
 	DBG(KERN_DEBUG "PCI: IRQ fixup\n");
+	if (!platform_has(X86_PLATFORM_FEATURE_BIOS)) {
+		DBG(KERN_DEBUG "PCI: No BIOS, skip IRQ fixup\n");
+		return;
+	}
 	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
 		/*
 		 * If the BIOS has set an out of range IRQ number, just
@@ -1214,8 +1219,26 @@  static int pirq_enable_irq(struct pci_dev *dev)
 {
 	u8 pin;
 	struct pci_dev *temp_dev;
+	int ioapic;
 
 	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SFI)
+	/* For platforms only have IOAPIC, the PCI irq line is 1:1 mapped to
+	 * IOAPIC RTE entries, so we just enable RTE for the device.
+	 */
+	if (platform_has(X86_PLATFORM_FEATURE_IOAPIC) &&
+		!platform_has(X86_PLATFORM_FEATURE_8259) &&
+		!platform_has(X86_PLATFORM_FEATURE_ACPI) &&
+		!platform_has(X86_PLATFORM_FEATURE_BIOS) &&
+		pin) {
+		ioapic = mp_sfi_find_ioapic(dev->irq);
+		io_apic_set_pci_routing(ioapic,
+			dev->irq, /* IOAPIC pin */
+			dev->irq, /* IRQ line */
+			1, /* level trigger */
+			1); /* polarity active low */
+	}
+#endif
 	if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
 		char *msg = "";
 
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 8766b0e..4ff971a 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -17,6 +17,7 @@ 
 #include <linux/sort.h>
 #include <asm/e820.h>
 #include <asm/pci_x86.h>
+#include <linux/sfi.h>
 
 /* aperture is up to 256MB but BIOS may reserve less */
 #define MMCONFIG_APER_MIN	(2 * 1024*1024)
@@ -186,7 +187,12 @@  static const char __init *pci_mmcfg_nvidia_mcp55(void)
 	/*
 	 * do check if amd fam10h already took over
 	 */
-	if (!acpi_disabled || pci_mmcfg_config_num || mcp55_checked)
+#ifdef CONFIG_ACPI
+
+	if (!acpi_disabled)
+		return NULL;
+#endif
+	if (pci_mmcfg_config_num || mcp55_checked)
 		return NULL;
 
 	mcp55_checked = true;
@@ -362,6 +368,7 @@  static void __init pci_mmcfg_insert_resources(void)
 	pci_mmcfg_resources_inserted = 1;
 }
 
+#ifdef CONFIG_ACPI
 static acpi_status __init check_mcfg_resource(struct acpi_resource *res,
 					      void *data)
 {
@@ -429,6 +436,7 @@  static int __init is_acpi_reserved(u64 start, u64 end, unsigned not_used)
 
 	return mcfg_res.flags;
 }
+#endif
 
 typedef int (*check_reserved_t)(u64 start, u64 end, unsigned type);
 
@@ -491,9 +499,10 @@  static void __init pci_mmcfg_reject_broken(int early)
 		       (unsigned int)cfg->start_bus_number,
 		       (unsigned int)cfg->end_bus_number);
 
+#ifdef CONFIG_ACPI
 		if (!early)
 			valid = is_mmconf_reserved(is_acpi_reserved, addr, size, i, cfg, 0);
-
+#endif
 		if (valid)
 			continue;
 
@@ -542,10 +551,17 @@  static void __init __pci_mmcfg_init(int early)
 			known_bridge = 1;
 	}
 
-	if (!known_bridge)
+	if (!known_bridge) {
+#ifdef CONFIG_SFI
+		sfi_table_parse(SFI_SIG_MCFG, sfi_parse_mcfg);
+#else
 		acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg);
+#endif
+	}
 
+#ifndef CONFIG_SFI
 	pci_mmcfg_reject_broken(early);
+#endif
 
 	if ((pci_mmcfg_config_num == 0) ||
 	    (pci_mmcfg_config == NULL) ||
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
index 8b2d561..ee9373a 100644
--- a/arch/x86/pci/mmconfig_32.c
+++ b/arch/x86/pci/mmconfig_32.c
@@ -12,6 +12,7 @@ 
 #include <linux/pci.h>
 #include <linux/init.h>
 #include <linux/acpi.h>
+#include <linux/sfi.h>
 #include <asm/e820.h>
 #include <asm/pci_x86.h>
 
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c
new file mode 100644
index 0000000..1d69381
--- /dev/null
+++ b/arch/x86/pci/mrst.c
@@ -0,0 +1,236 @@ 
+/*
+ * Moorestown PCI support
+ *   Copyright (c) 2008 Intel Corporation
+ *     Jesse Barnes <jesse.barnes@intel.com>
+ *
+ * Moorestown has an interesting PCI implementation:
+ *   - configuration space is memory mapped (as defined by MCFG)
+ *   - Lincroft devices also have a real, type 1 configuration space
+ *   - Early Lincroft silicon has a type 1 access bug that will cause
+ *     a hang if non-existent devices are accessed
+ *   - some devices have the "fixed BAR" capability, which means
+ *     they can't be relocated or modified; check for that during
+ *     BAR sizing
+ *
+ * So, we use the MCFG space for all reads and writes, but also send
+ * Lincroft writes to type 1 space.  But only read/write if the device
+ * actually exists, otherwise return all 1s for reads and bit bucket
+ * the writes.
+ *
+ * Here we assume that type 1 and MCFG based access has already been set up,
+ * such that raw_pci_ops is type 1 and raw_pci_ext_ops is MCFG.
+ */
+
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/dmi.h>
+
+#include <asm/acpi.h>
+#include <asm/segment.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/pci_x86.h>
+/*
+ * Yuck, we really need to add PCIe capability functions to the core
+ */
+#define PCIE_CAP_OFFSET	0x100
+
+/* Fixed BAR fields */
+#define PCIE_VNDR_CAP_ID_FIXED_BAR 0x00	/* Fixed BAR (TBD) */
+#define PCI_FIXED_BAR_0_SIZE	0x04
+#define PCI_FIXED_BAR_1_SIZE	0x08
+#define PCI_FIXED_BAR_2_SIZE	0x0c
+#define PCI_FIXED_BAR_3_SIZE	0x10
+#define PCI_FIXED_BAR_4_SIZE	0x14
+#define PCI_FIXED_BAR_5_SIZE	0x1c
+
+int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn)
+{
+	int pos;
+	u32 pcie_cap = 0, cap_data;
+
+	pos = PCIE_CAP_OFFSET;
+	while (pos) {
+		if (raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number,
+					  devfn, pos, 4, &pcie_cap))
+			return 0;
+
+		if (pcie_cap == 0xffffffff)
+			return 0;
+
+		if (PCI_EXT_CAP_ID(pcie_cap) == PCI_EXT_CAP_ID_VNDR) {
+			raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number,
+					      devfn, pos + 4, 4, &cap_data);
+			if ((cap_data & 0xffff) == PCIE_VNDR_CAP_ID_FIXED_BAR)
+				return pos;
+		}
+
+		pos = pcie_cap >> 20;
+	}
+
+	return 0;
+}
+
+static int pci_device_update_fixed(struct pci_bus *bus, unsigned int devfn,
+				   int reg, int len, u32 val, int offset)
+{
+	u32 size;
+	unsigned int domain, busnum;
+	int bar = (reg - PCI_BASE_ADDRESS_0) >> 2;
+
+	domain = pci_domain_nr(bus);
+	busnum = bus->number;
+
+	if (val == ~0 && len == 4) {
+		unsigned long decode;
+
+		raw_pci_ext_ops->read(domain, busnum, devfn,
+				      offset + 8 + (bar * 4), 4, &size);
+
+		/* Turn the size into a decode pattern for the sizing code */
+		if (size) {
+			decode = size - 1;
+			decode |= decode >> 1;
+			decode |= decode >> 2;
+			decode |= decode >> 4;
+			decode |= decode >> 8;
+			decode |= decode >> 16;
+			decode++;
+			decode = ~(decode - 1);
+		} else {
+			decode = ~0;
+		}
+
+		/*
+		 * If val is all ones, the core code is trying to size the reg,
+		 * so update the mmconfig space with the real size.
+		 *
+		 * Note: this assumes the fixed size we got is a power of two.
+		 */
+		return raw_pci_ext_ops->write(domain, busnum, devfn, reg, 4,
+					      decode);
+	}
+
+	/* This is some other kind of BAR write, so just do it. */
+	return raw_pci_ext_ops->write(domain, busnum, devfn, reg, len, val);
+}
+
+/**
+ * type1_access_ok - check whether to use type 1
+ * @bus: bus number
+ * @devfn: device & function in question
+ *
+ * If the bus is on a Lincroft chip and it exists, or is not on a Lincroft at
+ * all, the we can go ahead with any reads & writes.  If it's on a Lincroft,
+ * but doesn't exist, avoid the access altogether to keep the chip from
+ * hanging.
+ */
+static bool type1_access_ok(unsigned int bus, unsigned int devfn, int reg)
+{
+	/* This is a workaround for A0 LNC bug where PCI status register does
+	 * not have new CAP bit set. can not be written by SW either.
+	 *
+	 * PCI header type in real LNC indicates a single function device, this
+	 * will prevent probing other devices under the same function in PCI
+	 * shim. Therefore, use the header type in shim instead.
+	 */
+	if (reg >= 0x100 || reg == PCI_STATUS || reg == PCI_HEADER_TYPE)
+		return 0;
+	if (bus == 0 && (devfn == PCI_DEVFN(2, 0) || devfn == PCI_DEVFN(0, 0)))
+		return 1;
+	return 0; /* langwell on others */
+}
+
+static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
+		    int size, u32 *value)
+{
+	if (type1_access_ok(bus->number, devfn, where))
+		return raw_pci_ops->read(pci_domain_nr(bus), bus->number, devfn,
+				   where, size, value);
+	return raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number,
+				     devfn, where, size, value);
+}
+
+static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
+		     int size, u32 value)
+{
+	int offset;
+
+	/* On MRST, there is no PCI ROM BAR, this will cause a subsequent read
+	 * to ROM BAR return 0 then being ignored.
+	 */
+	if (where == PCI_ROM_ADDRESS)
+		return 0;
+	/*
+	 * Devices with fixed BARs need special handling:
+	 *   - BAR sizing code will save, write ~0, read size, restore
+	 *   - so writes to fixed BARs need special handling
+	 *   - other writes to fixed BAR devices should go through mmconfig
+	 */
+	offset = fixed_bar_cap(bus, devfn);
+	if (offset &&
+	    (where >= PCI_BASE_ADDRESS_0 && where <= PCI_BASE_ADDRESS_5)) {
+		return pci_device_update_fixed(bus, devfn, where, size, value,
+			offset);
+	}
+
+	/*
+	 * On Moorestown update both real & mmconfig space
+	 * Note: assumes raw_pci_ext_ops is mmconfig if we're using Lincroft
+	 * Note 2: early Lincroft silicon can't handle type 1 accesses to
+	 *         non-existent devices, so just eat the write in that case.
+	 */
+	if (type1_access_ok(bus->number, devfn, where))
+		return raw_pci_ops->write(pci_domain_nr(bus), bus->number,
+				devfn, where, size, value);
+	return raw_pci_ext_ops->write(pci_domain_nr(bus), bus->number, devfn,
+				      where, size, value);
+}
+
+struct pci_ops pci_mrst_ops = {
+	.read = pci_read,
+	.write = pci_write,
+};
+#ifdef CONFIG_MRST
+/**
+ * pci_mrst_init - figure out if this platform should use pci_mrst_ops
+ *
+ * Moorestown has an interesting PCI implementation (see above).  This
+ * function checks whether the current platform should use MRST-style PCI
+ * config space ops or not, and sets pci_root_ops accordingly.
+ */
+void pci_mrst_init(void)
+{
+	printk(KERN_INFO "Moorestown platform detected, using MRST PCI ops\n");
+	pci_root_ops = pci_mrst_ops;
+}
+#else
+void pci_mrst_init(void) { }
+#endif
+
+#ifdef CONFIG_MRST
+/*
+ * Langwell devices reside at fixed offsets, don't try to move them.
+ */
+static void __devinit pci_fixed_bar_fixup(struct pci_dev *dev)
+{
+	unsigned long offset;
+	u32 size;
+	int i;
+	/* Fixup the BAR sizes for fixed BAR devices and make them unmoveable */
+	offset = fixed_bar_cap(dev->bus, dev->devfn);
+	if (!offset || PCI_DEVFN(2, 0) == dev->devfn ||
+	    PCI_DEVFN(2, 2) == dev->devfn)
+		return;
+
+	for (i = 0; i < PCI_ROM_RESOURCE; i++) {
+		pci_read_config_dword(dev, offset + 8 + (i * 4), &size);
+		dev->resource[i].end = dev->resource[i].start + size - 1;
+		dev->resource[i].flags |= IORESOURCE_PCI_FIXED;
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixed_bar_fixup);
+#endif
+
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 72698d8..fe4d18d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -708,6 +708,7 @@  int pci_execute_reset_function(struct pci_dev *dev);
 void pci_update_resource(struct pci_dev *dev, int resno);
 int __must_check pci_assign_resource(struct pci_dev *dev, int i);
 int pci_select_bars(struct pci_dev *dev, unsigned long flags);
+int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn);
 
 /* ROM control related routines */
 int pci_enable_rom(struct pci_dev *pdev);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 0f71812..1a6e34a 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2371,6 +2371,8 @@ 
 #define PCI_DEVICE_ID_INTEL_82375	0x0482
 #define PCI_DEVICE_ID_INTEL_82424	0x0483
 #define PCI_DEVICE_ID_INTEL_82378	0x0484
+#define PCI_DEVICE_ID_INTEL_MRST_SD0	0x0807
+#define PCI_DEVICE_ID_INTEL_MRST_SD1	0x0808
 #define PCI_DEVICE_ID_INTEL_I960	0x0960
 #define PCI_DEVICE_ID_INTEL_I960RM	0x0962
 #define PCI_DEVICE_ID_INTEL_8257X_SOL	0x1062
@@ -2539,6 +2541,7 @@ 
 #define PCI_DEVICE_ID_INTEL_PCH_LPC_MAX	0x3b1f
 #define PCI_DEVICE_ID_INTEL_PCH_SMBUS	0x3b30
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB	0x402f
+#define PCI_DEVICE_ID_INTEL_MRST_GFX	0x4102
 #define PCI_DEVICE_ID_INTEL_5100_16	0x65f0
 #define PCI_DEVICE_ID_INTEL_5100_21	0x65f5
 #define PCI_DEVICE_ID_INTEL_5100_22	0x65f6
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index 616bf8b..4b412ae 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -503,6 +503,7 @@ 
 #define PCI_EXT_CAP_ID_PWR	4
 #define PCI_EXT_CAP_ID_ARI	14
 #define PCI_EXT_CAP_ID_SRIOV	16
+#define PCI_EXT_CAP_ID_VNDR	11
 
 /* Advanced Error Reporting */
 #define PCI_ERR_UNCOR_STATUS	4	/* Uncorrectable Error Status */