diff mbox

[v2,04/10] drm/nouveau/fb: add GK20A support

Message ID 1398060142-7937-5-git-send-email-acourbot@nvidia.com (mailing list archive)
State New, archived
Headers show

Commit Message

Alexandre Courbot April 21, 2014, 6:02 a.m. UTC
Add a simple FB device for GK20A, as well as a RAM implementation based
on contiguous DMA memory allocations suitable for chips that use system
memory as video RAM.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
---
 drivers/gpu/drm/nouveau/Makefile                  |   2 +
 drivers/gpu/drm/nouveau/core/include/subdev/fb.h  |   1 +
 drivers/gpu/drm/nouveau/core/subdev/fb/gk20a.c    |  56 ++++++++
 drivers/gpu/drm/nouveau/core/subdev/fb/priv.h     |   1 +
 drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c | 168 ++++++++++++++++++++++
 5 files changed, 228 insertions(+)
 create mode 100644 drivers/gpu/drm/nouveau/core/subdev/fb/gk20a.c
 create mode 100644 drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c

Comments

Thierry Reding April 22, 2014, 10:40 a.m. UTC | #1
On Mon, Apr 21, 2014 at 03:02:16PM +0900, Alexandre Courbot wrote:
[...]
> diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c
[...]
> +		pages = dma_alloc_from_contiguous(dev, ncmin, order);
> +		if (!pages) {
> +			gk20a_ram_put(pfb, &mem);
> +			return -ENOMEM;
> +		}
> +
> +		dma_addr = pfn_to_dma(nv_device_base(nv_device(pfb)),
> +				      page_to_pfn(pages));

This breaks compilation on x86 because neither pfn_to_dma() nor
dma_to_pfn() are available. Is there some other way this can be
allocated so that these functions don't need to be called?

Thierry
Alexandre Courbot April 23, 2014, 2:07 a.m. UTC | #2
On 04/22/2014 07:40 PM, Thierry Reding wrote:
> * PGP Signed by an unknown key
>
> On Mon, Apr 21, 2014 at 03:02:16PM +0900, Alexandre Courbot wrote:
> [...]
>> diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c
> [...]
>> +		pages = dma_alloc_from_contiguous(dev, ncmin, order);
>> +		if (!pages) {
>> +			gk20a_ram_put(pfb, &mem);
>> +			return -ENOMEM;
>> +		}
>> +
>> +		dma_addr = pfn_to_dma(nv_device_base(nv_device(pfb)),
>> +				      page_to_pfn(pages));
>
> This breaks compilation on x86 because neither pfn_to_dma() nor
> dma_to_pfn() are available. Is there some other way this can be
> allocated so that these functions don't need to be called?

Mmm, this is bad. There is probably another more portable way to do 
this. Let me look for it.

Thanks,
Alex.
Alexandre Courbot April 23, 2014, 6:11 a.m. UTC | #3
On Wed, Apr 23, 2014 at 11:07 AM, Alexandre Courbot <acourbot@nvidia.com> wrote:
> On 04/22/2014 07:40 PM, Thierry Reding wrote:
>>
>> * PGP Signed by an unknown key
>>
>>
>> On Mon, Apr 21, 2014 at 03:02:16PM +0900, Alexandre Courbot wrote:
>> [...]
>>>
>>> diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c
>>> b/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c
>>
>> [...]
>>>
>>> +               pages = dma_alloc_from_contiguous(dev, ncmin, order);
>>> +               if (!pages) {
>>> +                       gk20a_ram_put(pfb, &mem);
>>> +                       return -ENOMEM;
>>> +               }
>>> +
>>> +               dma_addr = pfn_to_dma(nv_device_base(nv_device(pfb)),
>>> +                                     page_to_pfn(pages));
>>
>>
>> This breaks compilation on x86 because neither pfn_to_dma() nor
>> dma_to_pfn() are available. Is there some other way this can be
>> allocated so that these functions don't need to be called?
>
>
> Mmm, this is bad. There is probably another more portable way to do this.
> Let me look for it.

page_to_phys()/phys_to_page() can be used by drivers and will work
just fine here since the CPU and GPU use the same physical addresses
to access memory.

Thanks,
Alex.
Thierry Reding April 28, 2014, 11:44 a.m. UTC | #4
On Wed, Apr 23, 2014 at 03:11:01PM +0900, Alexandre Courbot wrote:
> On Wed, Apr 23, 2014 at 11:07 AM, Alexandre Courbot <acourbot@nvidia.com> wrote:
> > On 04/22/2014 07:40 PM, Thierry Reding wrote:
> >>
> >> * PGP Signed by an unknown key
> >>
> >>
> >> On Mon, Apr 21, 2014 at 03:02:16PM +0900, Alexandre Courbot wrote:
> >> [...]
> >>>
> >>> diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c
> >>> b/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c
> >>
> >> [...]
> >>>
> >>> +               pages = dma_alloc_from_contiguous(dev, ncmin, order);
> >>> +               if (!pages) {
> >>> +                       gk20a_ram_put(pfb, &mem);
> >>> +                       return -ENOMEM;
> >>> +               }
> >>> +
> >>> +               dma_addr = pfn_to_dma(nv_device_base(nv_device(pfb)),
> >>> +                                     page_to_pfn(pages));
> >>
> >>
> >> This breaks compilation on x86 because neither pfn_to_dma() nor
> >> dma_to_pfn() are available. Is there some other way this can be
> >> allocated so that these functions don't need to be called?
> >
> >
> > Mmm, this is bad. There is probably another more portable way to do this.
> > Let me look for it.
> 
> page_to_phys()/phys_to_page() can be used by drivers and will work
> just fine here since the CPU and GPU use the same physical addresses
> to access memory.

I'm wondering how this is going to pan out when we try adding IOMMU
support. But I guess we can cross that bridge when we come to it.

Thierry
Alexandre Courbot May 1, 2014, 4:49 a.m. UTC | #5
On Mon, Apr 28, 2014 at 8:44 PM, Thierry Reding
<thierry.reding@gmail.com> wrote:
> On Wed, Apr 23, 2014 at 03:11:01PM +0900, Alexandre Courbot wrote:
>> On Wed, Apr 23, 2014 at 11:07 AM, Alexandre Courbot <acourbot@nvidia.com> wrote:
>> > On 04/22/2014 07:40 PM, Thierry Reding wrote:
>> >>
>> >> * PGP Signed by an unknown key
>> >>
>> >>
>> >> On Mon, Apr 21, 2014 at 03:02:16PM +0900, Alexandre Courbot wrote:
>> >> [...]
>> >>>
>> >>> diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c
>> >>> b/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c
>> >>
>> >> [...]
>> >>>
>> >>> +               pages = dma_alloc_from_contiguous(dev, ncmin, order);
>> >>> +               if (!pages) {
>> >>> +                       gk20a_ram_put(pfb, &mem);
>> >>> +                       return -ENOMEM;
>> >>> +               }
>> >>> +
>> >>> +               dma_addr = pfn_to_dma(nv_device_base(nv_device(pfb)),
>> >>> +                                     page_to_pfn(pages));
>> >>
>> >>
>> >> This breaks compilation on x86 because neither pfn_to_dma() nor
>> >> dma_to_pfn() are available. Is there some other way this can be
>> >> allocated so that these functions don't need to be called?
>> >
>> >
>> > Mmm, this is bad. There is probably another more portable way to do this.
>> > Let me look for it.
>>
>> page_to_phys()/phys_to_page() can be used by drivers and will work
>> just fine here since the CPU and GPU use the same physical addresses
>> to access memory.
>
> I'm wondering how this is going to pan out when we try adding IOMMU
> support. But I guess we can cross that bridge when we come to it.

Agreed. Besides I hope the day won't come where we have to go through
2 layers of memory translation for the GPU...
Terje Bergstrom May 2, 2014, 6:26 a.m. UTC | #6
On 01.05.2014 07:49, Alexandre Courbot wrote:
> Agreed. Besides I hope the day won't come where we have to go through
> 2 layers of memory translation for the GPU...

That's actually the method of operation that gives us the best
performance. GPU likes big pages, and without IOMMU translation you'd
have a hard time finding enough contiguous physical memory.

Terje
diff mbox

Patch

diff --git a/drivers/gpu/drm/nouveau/Makefile b/drivers/gpu/drm/nouveau/Makefile
index bc5fb24630e6..34fef246e779 100644
--- a/drivers/gpu/drm/nouveau/Makefile
+++ b/drivers/gpu/drm/nouveau/Makefile
@@ -102,6 +102,7 @@  nouveau-y += core/subdev/fb/nvaa.o
 nouveau-y += core/subdev/fb/nvaf.o
 nouveau-y += core/subdev/fb/nvc0.o
 nouveau-y += core/subdev/fb/nve0.o
+nouveau-y += core/subdev/fb/gk20a.o
 nouveau-y += core/subdev/fb/gm107.o
 nouveau-y += core/subdev/fb/ramnv04.o
 nouveau-y += core/subdev/fb/ramnv10.o
@@ -117,6 +118,7 @@  nouveau-y += core/subdev/fb/ramnva3.o
 nouveau-y += core/subdev/fb/ramnvaa.o
 nouveau-y += core/subdev/fb/ramnvc0.o
 nouveau-y += core/subdev/fb/ramnve0.o
+nouveau-y += core/subdev/fb/ramgk20a.o
 nouveau-y += core/subdev/fb/ramgm107.o
 nouveau-y += core/subdev/fb/sddr3.o
 nouveau-y += core/subdev/fb/gddr5.o
diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/fb.h b/drivers/gpu/drm/nouveau/core/include/subdev/fb.h
index 58c7ccdebb01..871e73914b24 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/fb.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/fb.h
@@ -105,6 +105,7 @@  extern struct nouveau_oclass *nvaa_fb_oclass;
 extern struct nouveau_oclass *nvaf_fb_oclass;
 extern struct nouveau_oclass *nvc0_fb_oclass;
 extern struct nouveau_oclass *nve0_fb_oclass;
+extern struct nouveau_oclass *gk20a_fb_oclass;
 extern struct nouveau_oclass *gm107_fb_oclass;
 
 #include <subdev/bios/ramcfg.h>
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/gk20a.c b/drivers/gpu/drm/nouveau/core/subdev/fb/gk20a.c
new file mode 100644
index 000000000000..a16024a74771
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/gk20a.c
@@ -0,0 +1,56 @@ 
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvc0.h"
+
+struct gk20a_fb_priv {
+	struct nouveau_fb base;
+};
+
+static int
+gk20a_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	     struct nouveau_oclass *oclass, void *data, u32 size,
+	     struct nouveau_object **pobject)
+{
+	struct gk20a_fb_priv *priv;
+	int ret;
+
+	ret = nouveau_fb_create(parent, engine, oclass, &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+struct nouveau_oclass *
+gk20a_fb_oclass = &(struct nouveau_fb_impl) {
+	.base.handle = NV_SUBDEV(FB, 0xea),
+	.base.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = gk20a_fb_ctor,
+		.dtor = _nouveau_fb_dtor,
+		.init = _nouveau_fb_init,
+		.fini = _nouveau_fb_fini,
+	},
+	.memtype = nvc0_fb_memtype_valid,
+	.ram = &gk20a_ram_oclass,
+}.base;
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/priv.h b/drivers/gpu/drm/nouveau/core/subdev/fb/priv.h
index da74c889aed4..82273f832e42 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/priv.h
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/priv.h
@@ -32,6 +32,7 @@  extern struct nouveau_oclass nva3_ram_oclass;
 extern struct nouveau_oclass nvaa_ram_oclass;
 extern struct nouveau_oclass nvc0_ram_oclass;
 extern struct nouveau_oclass nve0_ram_oclass;
+extern struct nouveau_oclass gk20a_ram_oclass;
 extern struct nouveau_oclass gm107_ram_oclass;
 
 int nouveau_sddr3_calc(struct nouveau_ram *ram);
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c
new file mode 100644
index 000000000000..7e9938e04f07
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c
@@ -0,0 +1,168 @@ 
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "priv.h"
+
+#include <subdev/fb.h>
+
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <linux/dma-contiguous.h>
+
+static void
+gk20a_ram_put(struct nouveau_fb *pfb, struct nouveau_mem **pmem)
+{
+	struct device *dev = nv_device_base(nv_device(pfb));
+	struct nouveau_mem *mem = *pmem;
+	int i;
+
+	*pmem = NULL;
+
+	for (i = 0; i < mem->size; i++) {
+		struct page *page;
+
+		if (mem->pages[i] == 0)
+			break;
+
+		page = pfn_to_page(dma_to_pfn(dev, mem->pages[i]));
+		dma_release_from_contiguous(dev, page, 1);
+	}
+
+	kfree(mem->pages);
+	kfree(mem);
+}
+
+static int
+gk20a_ram_get(struct nouveau_fb *pfb, u64 size, u32 align, u32 ncmin,
+	     u32 memtype, struct nouveau_mem **pmem)
+{
+	struct device *dev = nv_device_base(nv_device(pfb));
+	struct nouveau_mem *mem;
+	int type = memtype & 0xff;
+	dma_addr_t dma_addr;
+	int npages;
+	int order;
+	int i;
+
+	nv_debug(pfb, "%s: size: %llx align: %x, ncmin: %x\n", __func__, size,
+		 align, ncmin);
+
+	npages = size >> PAGE_SHIFT;
+	if (npages == 0)
+		npages = 1;
+
+	if (align == 0)
+		align = PAGE_SIZE;
+	align >>= PAGE_SHIFT;
+
+	/* round alignment to the next power of 2, if needed */
+	order = fls(align);
+	if ((align & (align - 1)) == 0)
+		order--;
+
+	ncmin >>= PAGE_SHIFT;
+	/*
+	 * allocate pages by chunks of "align" size, otherwise we may leave
+	 * holes in the contiguous memory area.
+	 */
+	if (ncmin == 0)
+		ncmin = npages;
+	else if (align > ncmin)
+		ncmin = align;
+
+	mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+	if (!mem)
+		return -ENOMEM;
+
+	mem->size = npages;
+	mem->memtype = type;
+
+	mem->pages = kzalloc(sizeof(dma_addr_t) * npages, GFP_KERNEL);
+	if (!mem) {
+		kfree(mem);
+		return -ENOMEM;
+	}
+
+	while (npages) {
+		struct page *pages;
+		int pos = 0;
+
+		/* don't overflow in case size is not a multiple of ncmin */
+		if (ncmin > npages)
+			ncmin = npages;
+
+		pages = dma_alloc_from_contiguous(dev, ncmin, order);
+		if (!pages) {
+			gk20a_ram_put(pfb, &mem);
+			return -ENOMEM;
+		}
+
+		dma_addr = pfn_to_dma(nv_device_base(nv_device(pfb)),
+				      page_to_pfn(pages));
+
+		nv_debug(pfb, "  alloc count: %x, order: %x, addr: %x\n", ncmin,
+			 order, dma_addr);
+
+		for (i = 0; i < ncmin; i++)
+			mem->pages[pos + i] = dma_addr + (PAGE_SIZE * i);
+
+		pos += ncmin;
+		npages -= ncmin;
+	}
+
+	mem->offset = (u64)mem->pages[0];
+
+	*pmem = mem;
+
+	return 0;
+}
+
+static int
+gk20a_ram_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	      struct nouveau_oclass *oclass, void *data, u32 datasize,
+	      struct nouveau_object **pobject)
+{
+	struct nouveau_ram *ram;
+	int ret;
+
+	ret = nouveau_ram_create(parent, engine, oclass, &ram);
+	*pobject = nv_object(ram);
+	if (ret)
+		return ret;
+	ram->type = NV_MEM_TYPE_STOLEN;
+	ram->size = get_num_physpages() << PAGE_SHIFT;
+
+	ram->get = gk20a_ram_get;
+	ram->put = gk20a_ram_put;
+
+	return 0;
+}
+
+struct nouveau_oclass
+gk20a_ram_oclass = {
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = gk20a_ram_ctor,
+		.dtor = _nouveau_ram_dtor,
+		.init = _nouveau_ram_init,
+		.fini = _nouveau_ram_fini,
+	},
+};