Message ID | 1374686569-2376-2-git-send-email-jbarnes@virtuousgeek.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Jul 24, 2013 at 10:22:49AM -0700, Jesse Barnes wrote: > Systems with Intel graphics controllers set aside memory exclusively for > gfx driver use. This memory is not marked in the E820 as reserved or as > RAM, and so is subject to overlap from E820 manipulation later in the > boot process. On some systems, MMIO space is allocated on top, despite > the efforts of the "RAM buffer" approach, which simply rounds memory > boundaries up to 64M to try to catch space that may decode as RAM and so > is not suitable for MMIO. > > Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org> > --- > arch/x86/include/asm/pci-direct.h | 1 + > arch/x86/kernel/early-quirks.c | 150 +++++++++++++++++++++++++++++++++++++ > arch/x86/pci/early.c | 8 ++ > include/drm/i915_drm.h | 18 +++++ > 4 files changed, 177 insertions(+) > > diff --git a/arch/x86/include/asm/pci-direct.h b/arch/x86/include/asm/pci-direct.h > index b1e7a45..a1dc283 100644 > --- a/arch/x86/include/asm/pci-direct.h > +++ b/arch/x86/include/asm/pci-direct.h > @@ -9,6 +9,7 @@ > extern u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset); > extern u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset); > extern u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset); > +extern u32 read_pci_config_32(u8 bus, u8 slot, u8 func, u8 offset); > extern void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, u32 val); > extern void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val); > extern void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val); > diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c > index 94ab6b9..208ed8d 100644 > --- a/arch/x86/kernel/early-quirks.c > +++ b/arch/x86/kernel/early-quirks.c > @@ -12,6 +12,7 @@ > #include <linux/pci.h> > #include <linux/acpi.h> > #include <linux/pci_ids.h> > +#include <drm/i915_drm.h> > #include <asm/pci-direct.h> > #include <asm/dma.h> > #include <asm/io_apic.h> > @@ -208,6 +209,153 @@ static void __init intel_remapping_check(int num, int slot, int func) > > } > > +/* > + * Systems with Intel graphics controllers set aside memory exclusively > + * for gfx driver use. This memory is not marked in the E820 as reserved > + * or as RAM, and so is subject to overlap from E820 manipulation later > + * in the boot process. On some systems, MMIO space is allocated on top, > + * despite the efforts of the "RAM buffer" approach, which simply rounds > + * memory boundaries up to 64M to try to catch space that may decode > + * as RAM and so is not suitable for MMIO. > + * > + * And yes, so far on current devices the base addr is always under 4G. > + */ And for the indefinite future. > +static u32 __init intel_stolen_base(int num, int slot, int func) > +{ > + u32 base; > + > + /* > + * Almost universally we can find the Graphics Base of Stolen Memory > + * at offset 0x5c in the igfx configuration space. On a few (desktop) > + * machines this is also mirrored in the bridge device at different > + * locations, or in the MCHBAR. > + */ > + base = read_pci_config_32(num, slot, func, 0x5c); > + base &= ~((1<<20) - 1); I wish there was some synatic sugar we can apply for base &= -MB(1); > + > + return base; > +} > + > +#define KB(x) ((x) * 1024) > +#define MB(x) (KB (KB (x))) > +#define GB(x) (MB (KB (x))) > + > +static size_t __init intel_stolen_size(int num, int slot, int func) SNB+ has a different defintion. (And IVB has multiple definitions in the bspec, but we've found the SNB one works best...) > +static struct pci_device_id intel_stolen_ids[] __initdata = { const? Or is initdata already applying that fixup? > +static void __init intel_graphics_stolen(int num, int slot, int func) > +{ > + size_t size; > + int i; > + u32 start; > + u16 device, subvendor, subdevice; > + > + device = read_pci_config_16(num, slot, func, PCI_DEVICE_ID); > + subvendor = read_pci_config_16(num, slot, func, > + PCI_SUBSYSTEM_VENDOR_ID); > + subdevice = read_pci_config_16(num, slot, func, PCI_SUBSYSTEM_ID); > + > + for (i = 0; i < ARRAY_SIZE(intel_stolen_ids); i++) { > + if (intel_stolen_ids[i].device == device && > + (intel_stolen_ids[i].subvendor == PCI_ANY_ID || > + intel_stolen_ids[i].subvendor == subvendor) && > + (intel_stolen_ids[i].subdevice == PCI_ANY_ID || > + intel_stolen_ids[i].subdevice == subdevice)) { > + size = intel_stolen_size(num, slot, func); See above comments for needing to split this on before/after SNB. And wouldn't it be cool to unifying the SNB quirk dispatch with this table? Is that even possible? > + start = intel_stolen_base(num, slot, func); > + if (size && start) > + goto found; > + else > + break; > + } > + } > + > + /* No match or invalid data, don't bother reserving */ > + return; > +found: > + /* Mark this space as reserved */ > + e820_add_region(start, size, E820_RESERVED); > + return; > +} > + > #define QFLAG_APPLY_ONCE 0x1 > #define QFLAG_APPLIED 0x2 > #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) > @@ -241,6 +389,8 @@ static struct chipset early_qrk[] __initdata = { > PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check }, > { PCI_VENDOR_ID_INTEL, 0x3406, PCI_CLASS_BRIDGE_HOST, > PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check }, > + { PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA, PCI_ANY_ID, > + QFLAG_APPLY_ONCE, intel_graphics_stolen }, > {} > }; > > diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c > index d1067d5..a3fc44c 100644 > --- a/arch/x86/pci/early.c > +++ b/arch/x86/pci/early.c > @@ -31,6 +31,14 @@ u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset) > return v; > } > > +u32 read_pci_config_32(u8 bus, u8 slot, u8 func, u8 offset) > +{ > + u32 v; > + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); > + v = inl(0xcfc); > + return v; > +} This is just read_pci_config(). -Chris
On Wed, 24 Jul 2013 18:59:02 +0100 Chris Wilson <chris@chris-wilson.co.uk> wrote: > > + * And yes, so far on current devices the base addr is always under 4G. > > + */ > > And for the indefinite future. Yeah going to 64 bits will require code changes too, I just wanted to head off that inevitable comment. > > + base = read_pci_config_32(num, slot, func, 0x5c); > > + base &= ~((1<<20) - 1); > > I wish there was some synatic sugar we can apply for base &= -MB(1); I think it's clear enough. Or is this a snarky comment that kernel.h has a macro for this you're not mentioning? :) > > + > > + return base; > > +} > > + > > +#define KB(x) ((x) * 1024) > > +#define MB(x) (KB (KB (x))) > > +#define GB(x) (MB (KB (x))) > > + > > +static size_t __init intel_stolen_size(int num, int slot, int func) > > SNB+ has a different defintion. (And IVB has multiple definitions in the > bspec, but we've found the SNB one works best...) > > > +static struct pci_device_id intel_stolen_ids[] __initdata = { > > const? Or is initdata already applying that fixup? const and __initdata don't get along because then the compiler doesn't know which section to put the array in. > > > +static void __init intel_graphics_stolen(int num, int slot, int func) > > +{ > > + size_t size; > > + int i; > > + u32 start; > > + u16 device, subvendor, subdevice; > > + > > + device = read_pci_config_16(num, slot, func, PCI_DEVICE_ID); > > + subvendor = read_pci_config_16(num, slot, func, > > + PCI_SUBSYSTEM_VENDOR_ID); > > + subdevice = read_pci_config_16(num, slot, func, PCI_SUBSYSTEM_ID); > > + > > + for (i = 0; i < ARRAY_SIZE(intel_stolen_ids); i++) { > > + if (intel_stolen_ids[i].device == device && > > + (intel_stolen_ids[i].subvendor == PCI_ANY_ID || > > + intel_stolen_ids[i].subvendor == subvendor) && > > + (intel_stolen_ids[i].subdevice == PCI_ANY_ID || > > + intel_stolen_ids[i].subdevice == subdevice)) { > > + size = intel_stolen_size(num, slot, func); > > See above comments for needing to split this on before/after SNB. And > wouldn't it be cool to unifying the SNB quirk dispatch with this table? > Is that even possible? I looked at it, there's not much in common aside from the PCI IDs. The SNB reservation occurs after the memory allocator is initialized (so we can steal the pages). It may be possible to do it earlier along with this code, but I'd say that's a separate patch either way. > > > + start = intel_stolen_base(num, slot, func); > > + if (size && start) > > + goto found; > > + else > > + break; > > + } > > + } > > + > > + /* No match or invalid data, don't bother reserving */ > > + return; > > +found: > > + /* Mark this space as reserved */ > > + e820_add_region(start, size, E820_RESERVED); > > + return; > > +} > > + > > #define QFLAG_APPLY_ONCE 0x1 > > #define QFLAG_APPLIED 0x2 > > #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) > > @@ -241,6 +389,8 @@ static struct chipset early_qrk[] __initdata = { > > PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check }, > > { PCI_VENDOR_ID_INTEL, 0x3406, PCI_CLASS_BRIDGE_HOST, > > PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check }, > > + { PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA, PCI_ANY_ID, > > + QFLAG_APPLY_ONCE, intel_graphics_stolen }, > > {} > > }; > > > > diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c > > index d1067d5..a3fc44c 100644 > > --- a/arch/x86/pci/early.c > > +++ b/arch/x86/pci/early.c > > @@ -31,6 +31,14 @@ u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset) > > return v; > > } > > > > +u32 read_pci_config_32(u8 bus, u8 slot, u8 func, u8 offset) > > +{ > > + u32 v; > > + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); > > + v = inl(0xcfc); > > + return v; > > +} > > This is just read_pci_config(). Oops missed it probably due to the consistent naming (_byte, _16, maybe we should add a _24). Will drop that bit.
On Wed, Jul 24, 2013 at 11:09:18AM -0700, Jesse Barnes wrote: > On Wed, 24 Jul 2013 18:59:02 +0100 > Chris Wilson <chris@chris-wilson.co.uk> wrote: > > > + base = read_pci_config_32(num, slot, func, 0x5c); > > > + base &= ~((1<<20) - 1); > > > > I wish there was some synatic sugar we can apply for base &= -MB(1); > > I think it's clear enough. Or is this a snarky comment that kernel.h > has a macro for this you're not mentioning? :) I was being sincere! It's an idiom that crops up often enough, so I wondering aloud if someone had a neat solution. -Chris
diff --git a/arch/x86/include/asm/pci-direct.h b/arch/x86/include/asm/pci-direct.h index b1e7a45..a1dc283 100644 --- a/arch/x86/include/asm/pci-direct.h +++ b/arch/x86/include/asm/pci-direct.h @@ -9,6 +9,7 @@ extern u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset); extern u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset); extern u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset); +extern u32 read_pci_config_32(u8 bus, u8 slot, u8 func, u8 offset); extern void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, u32 val); extern void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val); extern void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val); diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 94ab6b9..208ed8d 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -12,6 +12,7 @@ #include <linux/pci.h> #include <linux/acpi.h> #include <linux/pci_ids.h> +#include <drm/i915_drm.h> #include <asm/pci-direct.h> #include <asm/dma.h> #include <asm/io_apic.h> @@ -208,6 +209,153 @@ static void __init intel_remapping_check(int num, int slot, int func) } +/* + * Systems with Intel graphics controllers set aside memory exclusively + * for gfx driver use. This memory is not marked in the E820 as reserved + * or as RAM, and so is subject to overlap from E820 manipulation later + * in the boot process. On some systems, MMIO space is allocated on top, + * despite the efforts of the "RAM buffer" approach, which simply rounds + * memory boundaries up to 64M to try to catch space that may decode + * as RAM and so is not suitable for MMIO. + * + * And yes, so far on current devices the base addr is always under 4G. + */ +static u32 __init intel_stolen_base(int num, int slot, int func) +{ + u32 base; + + /* + * Almost universally we can find the Graphics Base of Stolen Memory + * at offset 0x5c in the igfx configuration space. On a few (desktop) + * machines this is also mirrored in the bridge device at different + * locations, or in the MCHBAR. + */ + base = read_pci_config_32(num, slot, func, 0x5c); + base &= ~((1<<20) - 1); + + return base; +} + +#define KB(x) ((x) * 1024) +#define MB(x) (KB (KB (x))) +#define GB(x) (MB (KB (x))) + +static size_t __init intel_stolen_size(int num, int slot, int func) +{ + size_t stolen_size; + u16 gmch_ctrl; + + gmch_ctrl = read_pci_config_16(0, 0, 0, I830_GMCH_CTRL); + + switch (gmch_ctrl & I855_GMCH_GMS_MASK) { + case I855_GMCH_GMS_STOLEN_1M: + stolen_size = MB(1); + break; + case I855_GMCH_GMS_STOLEN_4M: + stolen_size = MB(4); + break; + case I855_GMCH_GMS_STOLEN_8M: + stolen_size = MB(8); + break; + case I855_GMCH_GMS_STOLEN_16M: + stolen_size = MB(16); + break; + case I855_GMCH_GMS_STOLEN_32M: + stolen_size = MB(32); + break; + case I915_GMCH_GMS_STOLEN_48M: + stolen_size = MB(48); + break; + case I915_GMCH_GMS_STOLEN_64M: + stolen_size = MB(64); + break; + case G33_GMCH_GMS_STOLEN_128M: + stolen_size = MB(128); + break; + case G33_GMCH_GMS_STOLEN_256M: + stolen_size = MB(256); + break; + case INTEL_GMCH_GMS_STOLEN_96M: + stolen_size = MB(96); + break; + case INTEL_GMCH_GMS_STOLEN_160M: + stolen_size = MB(160); + break; + case INTEL_GMCH_GMS_STOLEN_224M: + stolen_size = MB(224); + break; + case INTEL_GMCH_GMS_STOLEN_352M: + stolen_size = MB(352); + break; + default: + stolen_size = 0; + break; + } + + return stolen_size; +} + +static struct pci_device_id intel_stolen_ids[] __initdata = { + INTEL_I85X_IDS(NULL), + INTEL_I865G_IDS(NULL), + INTEL_I915G_IDS(NULL), + INTEL_I915GM_IDS(NULL), + INTEL_I945G_IDS(NULL), + INTEL_I945GM_IDS(NULL), + INTEL_VLV_M_IDS(NULL), + INTEL_VLV_D_IDS(NULL), + INTEL_PINEVIEW_IDS(NULL), + INTEL_I965G_IDS(NULL), + INTEL_G33_IDS(NULL), + INTEL_I965GM_IDS(NULL), + INTEL_GM45_IDS(NULL), + INTEL_G45_IDS(NULL), + INTEL_IRONLAKE_D_IDS(NULL), + INTEL_IRONLAKE_M_IDS(NULL), + INTEL_SNB_D_IDS(NULL), + INTEL_SNB_M_IDS(NULL), + INTEL_IVB_M_IDS(NULL), + INTEL_IVB_D_IDS(NULL), + INTEL_IVB_Q_IDS(NULL), + INTEL_HSW_D_IDS(NULL), + INTEL_HSW_M_IDS(NULL), +}; + +static void __init intel_graphics_stolen(int num, int slot, int func) +{ + size_t size; + int i; + u32 start; + u16 device, subvendor, subdevice; + + device = read_pci_config_16(num, slot, func, PCI_DEVICE_ID); + subvendor = read_pci_config_16(num, slot, func, + PCI_SUBSYSTEM_VENDOR_ID); + subdevice = read_pci_config_16(num, slot, func, PCI_SUBSYSTEM_ID); + + for (i = 0; i < ARRAY_SIZE(intel_stolen_ids); i++) { + if (intel_stolen_ids[i].device == device && + (intel_stolen_ids[i].subvendor == PCI_ANY_ID || + intel_stolen_ids[i].subvendor == subvendor) && + (intel_stolen_ids[i].subdevice == PCI_ANY_ID || + intel_stolen_ids[i].subdevice == subdevice)) { + size = intel_stolen_size(num, slot, func); + start = intel_stolen_base(num, slot, func); + if (size && start) + goto found; + else + break; + } + } + + /* No match or invalid data, don't bother reserving */ + return; +found: + /* Mark this space as reserved */ + e820_add_region(start, size, E820_RESERVED); + return; +} + #define QFLAG_APPLY_ONCE 0x1 #define QFLAG_APPLIED 0x2 #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) @@ -241,6 +389,8 @@ static struct chipset early_qrk[] __initdata = { PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check }, { PCI_VENDOR_ID_INTEL, 0x3406, PCI_CLASS_BRIDGE_HOST, PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check }, + { PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA, PCI_ANY_ID, + QFLAG_APPLY_ONCE, intel_graphics_stolen }, {} }; diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c index d1067d5..a3fc44c 100644 --- a/arch/x86/pci/early.c +++ b/arch/x86/pci/early.c @@ -31,6 +31,14 @@ u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset) return v; } +u32 read_pci_config_32(u8 bus, u8 slot, u8 func, u8 offset) +{ + u32 v; + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + v = inl(0xcfc); + return v; +} + void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, u32 val) { diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index dc480f5..bcea925 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -35,6 +35,24 @@ extern bool i915_gpu_lower(void); extern bool i915_gpu_busy(void); extern bool i915_gpu_turbo_disable(void); +#define I830_GMCH_CTRL 0x52 + +#define I855_GMCH_GMS_MASK 0xF0 +#define I855_GMCH_GMS_STOLEN_0M 0x0 +#define I855_GMCH_GMS_STOLEN_1M (0x1 << 4) +#define I855_GMCH_GMS_STOLEN_4M (0x2 << 4) +#define I855_GMCH_GMS_STOLEN_8M (0x3 << 4) +#define I855_GMCH_GMS_STOLEN_16M (0x4 << 4) +#define I855_GMCH_GMS_STOLEN_32M (0x5 << 4) +#define I915_GMCH_GMS_STOLEN_48M (0x6 << 4) +#define I915_GMCH_GMS_STOLEN_64M (0x7 << 4) +#define G33_GMCH_GMS_STOLEN_128M (0x8 << 4) +#define G33_GMCH_GMS_STOLEN_256M (0x9 << 4) +#define INTEL_GMCH_GMS_STOLEN_96M (0xa << 4) +#define INTEL_GMCH_GMS_STOLEN_160M (0xb << 4) +#define INTEL_GMCH_GMS_STOLEN_224M (0xc << 4) +#define INTEL_GMCH_GMS_STOLEN_352M (0xd << 4) + #define INTEL_VGA_DEVICE(id, info) { \ .class = PCI_BASE_CLASS_DISPLAY << 16, \ .class_mask = 0xff0000, \
Systems with Intel graphics controllers set aside memory exclusively for gfx driver use. This memory is not marked in the E820 as reserved or as RAM, and so is subject to overlap from E820 manipulation later in the boot process. On some systems, MMIO space is allocated on top, despite the efforts of the "RAM buffer" approach, which simply rounds memory boundaries up to 64M to try to catch space that may decode as RAM and so is not suitable for MMIO. Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org> --- arch/x86/include/asm/pci-direct.h | 1 + arch/x86/kernel/early-quirks.c | 150 +++++++++++++++++++++++++++++++++++++ arch/x86/pci/early.c | 8 ++ include/drm/i915_drm.h | 18 +++++ 4 files changed, 177 insertions(+)