diff mbox

[kvm-unit-tests,11/14] pci: edu: introduce pci-edu helpers

Message ID 1476448852-30062-12-git-send-email-peterx@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Peter Xu Oct. 14, 2016, 12:40 p.m. UTC
QEMU edu device is a pci device that is originally written for
educational purpose, however it also suits for IOMMU unit test. Adding
helpers for this specific device to implement the device logic.

The device supports lots of functions, here only DMA operation is
supported.

The spec of the device can be found at:

  https://github.com/qemu/qemu/blob/master/docs/specs/edu.txt

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 lib/pci-edu.c | 128 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/pci-edu.h |  29 +++++++++++++
 2 files changed, 157 insertions(+)
 create mode 100644 lib/pci-edu.c
 create mode 100644 lib/pci-edu.h

Comments

Andrew Jones Oct. 20, 2016, 1:19 p.m. UTC | #1
On Fri, Oct 14, 2016 at 08:40:49PM +0800, Peter Xu wrote:
> QEMU edu device is a pci device that is originally written for
> educational purpose, however it also suits for IOMMU unit test. Adding
> helpers for this specific device to implement the device logic.
> 
> The device supports lots of functions, here only DMA operation is
> supported.
> 
> The spec of the device can be found at:
> 
>   https://github.com/qemu/qemu/blob/master/docs/specs/edu.txt
> 
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  lib/pci-edu.c | 128 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/pci-edu.h |  29 +++++++++++++
>  2 files changed, 157 insertions(+)
>  create mode 100644 lib/pci-edu.c
>  create mode 100644 lib/pci-edu.h
> 
> diff --git a/lib/pci-edu.c b/lib/pci-edu.c
> new file mode 100644
> index 0000000..4d1a5ab
> --- /dev/null
> +++ b/lib/pci-edu.c
> @@ -0,0 +1,128 @@
> +/*
> + * Edu PCI device.
> + *
> + * Copyright (C) 2016 Red Hat, Inc.
> + *
> + * Authors:
> + *   Peter Xu <peterx@redhat.com>,
> + *
> + * This work is licensed under the terms of the GNU LGPL, version 2 or
> + * later.
> + */
> +
> +#include "pci-edu.h"
> +
> +#define  PCI_VENDOR_ID_QEMU              (0x1234)
> +#define  PCI_DEVICE_ID_EDU               (0x11e8)
> +
> +/* The only bar used by EDU device */
> +#define EDU_BAR_MEM                 (0)
> +#define EDU_MAGIC                   (0xed)
> +#define EDU_VERSION                 (0x100)
> +#define EDU_DMA_BUF_SIZE            (1 << 20)
> +#define EDU_INPUT_BUF_SIZE          (256)
> +
> +#define EDU_REG_ID                  (0x0)
> +#define EDU_REG_ALIVE               (0x4)
> +#define EDU_REG_FACTORIAL           (0x8)
> +#define EDU_REG_STATUS              (0x20)
> +#define EDU_REG_DMA_SRC             (0x80)
> +#define EDU_REG_DMA_DST             (0x88)
> +#define EDU_REG_DMA_COUNT           (0x90)
> +#define EDU_REG_DMA_CMD             (0x98)
> +
> +#define EDU_CMD_DMA_START           (0x01)
> +#define EDU_CMD_DMA_FROM            (0x02)
> +#define EDU_CMD_DMA_TO              (0x00)
> +
> +#define EDU_STATUS_FACTORIAL        (0x1)
> +#define EDU_STATUS_INT_ENABLE       (0x80)
> +
> +#define EDU_DMA_START               (0x40000)
> +#define EDU_DMA_SIZE_MAX            (4096)

shouldn't the above defines be in the header?

> +
> +uint64_t edu_reg_readq(pci_edu_dev_t *dev, int reg)
> +{
> +	return *(volatile uint64_t *)(dev->pci_dev.pci_bar[EDU_BAR_MEM] + reg);
> +}
> +
> +uint32_t edu_reg_read(pci_edu_dev_t *dev, int reg)
> +{
> +	return *(volatile uint32_t *)(dev->pci_dev.pci_bar[EDU_BAR_MEM] + reg);
> +}
> +
> +void edu_reg_writeq(pci_edu_dev_t *dev, int reg, uint64_t val)
> +{
> +	*(volatile uint64_t *)(dev->pci_dev.pci_bar[EDU_BAR_MEM] + reg) = val;
> +}
> +
> +void edu_reg_write(pci_edu_dev_t *dev, int reg, uint32_t val)
> +{
> +	*(volatile uint32_t *)(dev->pci_dev.pci_bar[EDU_BAR_MEM] + reg) = val;
> +}

I'd put these accessors in the header as static inlines too

> +
> +/* Return true if alive */
> +bool edu_check_alive(pci_edu_dev_t *dev)
> +{
> +	static uint32_t live_count = 1;
> +	uint32_t value;
> +
> +	edu_reg_write(dev, EDU_REG_ALIVE, live_count++);
> +	value = edu_reg_read(dev, EDU_REG_ALIVE);
> +	return (live_count - 1 == ~value);
> +}

Even edu_check_alive could be a static inline in the header,
if it's something you'll do frequently. If it's only needed
for a sanity init test then I'd just inline it directly in
the one caller, edu_init

> +
> +int edu_init(pci_edu_dev_t *dev)
> +{
> +	int ret;
> +
> +	ret = pci_dev_init(&dev->pci_dev, PCI_VENDOR_ID_QEMU,
> +			   PCI_DEVICE_ID_EDU);
> +	if (ret)
> +		return ret;
> +
> +	if (!edu_check_alive(dev)) {
> +		printf("edu device not alive!\n");
> +		return -1;

should this ever fail? Or would an assert by fine here?
 alive = edu_check_alive(dev)
 assert(alive);

> +	}
> +
> +	return 0;
> +}
> +
> +uint32_t edu_status(pci_edu_dev_t *dev)
> +{
> +	return edu_reg_read(dev, EDU_REG_STATUS);
> +}

please, no unnecessary wrappers

> +
> +void edu_dma(pci_edu_dev_t *dev, iova_t iova,
> +	     size_t size, int dev_offset, pci_dma_dir_t dir)
> +{
> +	uint64_t from, to;
> +	uint32_t cmd = EDU_CMD_DMA_START;
> +
> +	assert(size <= EDU_DMA_SIZE_MAX);
> +	assert(dev_offset < EDU_DMA_SIZE_MAX &&
> +	       dev_offset >= 0);
> +
> +	printf("edu device DMA start %s addr %p size 0x%lu off 0x%x\n",
> +	       dir == PCI_DMA_FROM_DEVICE ? "FROM" : "TO",
> +	       (void *)iova, size, dev_offset);

is pci_dma_dir_t just a binary enum? If so, then I find it
sort of crufty. Can't we just have a 'bool write'?

> +
> +	if (dir == PCI_DMA_FROM_DEVICE) {
> +		from = dev_offset + EDU_DMA_START;
> +		to = iova;
> +		cmd |= EDU_CMD_DMA_FROM;
> +	} else {
> +		from = iova;
> +		to = EDU_DMA_START + dev_offset;
> +		cmd |= EDU_CMD_DMA_TO;
> +	}
> +
> +	edu_reg_writeq(dev, EDU_REG_DMA_SRC, from);
> +	edu_reg_writeq(dev, EDU_REG_DMA_DST, to);
> +	edu_reg_writeq(dev, EDU_REG_DMA_COUNT, size);
> +	edu_reg_write(dev, EDU_REG_DMA_CMD, cmd);
> +
> +	/* Wait until DMA finished */
> +	while (edu_reg_read(dev, EDU_REG_DMA_CMD) & EDU_CMD_DMA_START);

You may consider adding a cpu_relax() to lib/x86/asm/barrier.h, like
arm and ppc have. I noticed a big difference when running with tcg
on how fast a 'while (state-not-changed);' loop can complete when
adding that barrier.

> +}
> diff --git a/lib/pci-edu.h b/lib/pci-edu.h
> new file mode 100644
> index 0000000..6b7dbfd
> --- /dev/null
> +++ b/lib/pci-edu.h
> @@ -0,0 +1,29 @@
> +/*
> + * Edu PCI device header.
> + *
> + * Copyright (C) 2016 Red Hat, Inc.
> + *
> + * Authors:
> + *   Peter Xu <peterx@redhat.com>,
> + *
> + * This work is licensed under the terms of the GNU LGPL, version 2 or
> + * later.
> + *
> + * Edu device is a virtualized device in QEMU. Please refer to
> + * docs/specs/edu.txt in QEMU repository for EDU device manual.
> + */
> +#ifndef __PCI_EDU_H__
> +#define __PCI_EDU_H__
> +
> +#include "pci.h"
> +
> +struct pci_edu_dev {
> +	pci_dev pci_dev;
> +};
> +typedef struct pci_edu_dev pci_edu_dev_t;

Why wrap pci_dev in this pci_edu_dev struct? Will there ever
be more state? Should we just wait and see? Also, why use a
typedef for pci_dev (assuming we want it)? 'struct pci_dev'
would be more kernel like.

Thanks,
drew

> +
> +int edu_init(pci_edu_dev_t *dev);
> +void edu_dma(pci_edu_dev_t *dev, iova_t iova,
> +	     size_t size, int dev_offset, pci_dma_dir_t dir);
> +
> +#endif
> -- 
> 2.7.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Xu Oct. 25, 2016, 3:34 a.m. UTC | #2
On Thu, Oct 20, 2016 at 03:19:28PM +0200, Andrew Jones wrote:

[...]

> > +#include "pci-edu.h"
> > +
> > +#define  PCI_VENDOR_ID_QEMU              (0x1234)
> > +#define  PCI_DEVICE_ID_EDU               (0x11e8)
> > +
> > +/* The only bar used by EDU device */
> > +#define EDU_BAR_MEM                 (0)
> > +#define EDU_MAGIC                   (0xed)
> > +#define EDU_VERSION                 (0x100)
> > +#define EDU_DMA_BUF_SIZE            (1 << 20)
> > +#define EDU_INPUT_BUF_SIZE          (256)
> > +
> > +#define EDU_REG_ID                  (0x0)
> > +#define EDU_REG_ALIVE               (0x4)
> > +#define EDU_REG_FACTORIAL           (0x8)
> > +#define EDU_REG_STATUS              (0x20)
> > +#define EDU_REG_DMA_SRC             (0x80)
> > +#define EDU_REG_DMA_DST             (0x88)
> > +#define EDU_REG_DMA_COUNT           (0x90)
> > +#define EDU_REG_DMA_CMD             (0x98)
> > +
> > +#define EDU_CMD_DMA_START           (0x01)
> > +#define EDU_CMD_DMA_FROM            (0x02)
> > +#define EDU_CMD_DMA_TO              (0x00)
> > +
> > +#define EDU_STATUS_FACTORIAL        (0x1)
> > +#define EDU_STATUS_INT_ENABLE       (0x80)
> > +
> > +#define EDU_DMA_START               (0x40000)
> > +#define EDU_DMA_SIZE_MAX            (4096)
> 
> shouldn't the above defines be in the header?

I put them here since these are macros that I think will only be used
by this file only. However I think it'll still be okay to move them
into header files, so I'll do that.

> 
> > +
> > +uint64_t edu_reg_readq(pci_edu_dev_t *dev, int reg)
> > +{
> > +	return *(volatile uint64_t *)(dev->pci_dev.pci_bar[EDU_BAR_MEM] + reg);
> > +}
> > +
> > +uint32_t edu_reg_read(pci_edu_dev_t *dev, int reg)
> > +{
> > +	return *(volatile uint32_t *)(dev->pci_dev.pci_bar[EDU_BAR_MEM] + reg);
> > +}
> > +
> > +void edu_reg_writeq(pci_edu_dev_t *dev, int reg, uint64_t val)
> > +{
> > +	*(volatile uint64_t *)(dev->pci_dev.pci_bar[EDU_BAR_MEM] + reg) = val;
> > +}
> > +
> > +void edu_reg_write(pci_edu_dev_t *dev, int reg, uint32_t val)
> > +{
> > +	*(volatile uint32_t *)(dev->pci_dev.pci_bar[EDU_BAR_MEM] + reg) = val;
> > +}
> 
> I'd put these accessors in the header as static inlines too

Sure. Will do.

> 
> > +
> > +/* Return true if alive */
> > +bool edu_check_alive(pci_edu_dev_t *dev)
> > +{
> > +	static uint32_t live_count = 1;
> > +	uint32_t value;
> > +
> > +	edu_reg_write(dev, EDU_REG_ALIVE, live_count++);
> > +	value = edu_reg_read(dev, EDU_REG_ALIVE);
> > +	return (live_count - 1 == ~value);
> > +}
> 
> Even edu_check_alive could be a static inline in the header,
> if it's something you'll do frequently. If it's only needed
> for a sanity init test then I'd just inline it directly in
> the one caller, edu_init

Will do.

> 
> > +
> > +int edu_init(pci_edu_dev_t *dev)
> > +{
> > +	int ret;
> > +
> > +	ret = pci_dev_init(&dev->pci_dev, PCI_VENDOR_ID_QEMU,
> > +			   PCI_DEVICE_ID_EDU);
> > +	if (ret)
> > +		return ret;
> > +
> > +	if (!edu_check_alive(dev)) {
> > +		printf("edu device not alive!\n");
> > +		return -1;
> 
> should this ever fail? Or would an assert by fine here?
>  alive = edu_check_alive(dev)
>  assert(alive);

Yep. It should not fail. Will take assert.

> 
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +uint32_t edu_status(pci_edu_dev_t *dev)
> > +{
> > +	return edu_reg_read(dev, EDU_REG_STATUS);
> > +}
> 
> please, no unnecessary wrappers

Ok.

> 
> > +
> > +void edu_dma(pci_edu_dev_t *dev, iova_t iova,
> > +	     size_t size, int dev_offset, pci_dma_dir_t dir)
> > +{
> > +	uint64_t from, to;
> > +	uint32_t cmd = EDU_CMD_DMA_START;
> > +
> > +	assert(size <= EDU_DMA_SIZE_MAX);
> > +	assert(dev_offset < EDU_DMA_SIZE_MAX &&
> > +	       dev_offset >= 0);
> > +
> > +	printf("edu device DMA start %s addr %p size 0x%lu off 0x%x\n",
> > +	       dir == PCI_DMA_FROM_DEVICE ? "FROM" : "TO",
> > +	       (void *)iova, size, dev_offset);
> 
> is pci_dma_dir_t just a binary enum? If so, then I find it
> sort of crufty. Can't we just have a 'bool write'?

Will replace with "bool from_device".

> 
> > +
> > +	if (dir == PCI_DMA_FROM_DEVICE) {
> > +		from = dev_offset + EDU_DMA_START;
> > +		to = iova;
> > +		cmd |= EDU_CMD_DMA_FROM;
> > +	} else {
> > +		from = iova;
> > +		to = EDU_DMA_START + dev_offset;
> > +		cmd |= EDU_CMD_DMA_TO;
> > +	}
> > +
> > +	edu_reg_writeq(dev, EDU_REG_DMA_SRC, from);
> > +	edu_reg_writeq(dev, EDU_REG_DMA_DST, to);
> > +	edu_reg_writeq(dev, EDU_REG_DMA_COUNT, size);
> > +	edu_reg_write(dev, EDU_REG_DMA_CMD, cmd);
> > +
> > +	/* Wait until DMA finished */
> > +	while (edu_reg_read(dev, EDU_REG_DMA_CMD) & EDU_CMD_DMA_START);
> 
> You may consider adding a cpu_relax() to lib/x86/asm/barrier.h, like
> arm and ppc have. I noticed a big difference when running with tcg
> on how fast a 'while (state-not-changed);' loop can complete when
> adding that barrier.

Should I just do:

 #include <asm-generic/barrier.h>

in x86/asm/barrier.h, just like ppc?

Btw, could you elaborate what would be the difference? I see
cpu_relax() is defined as:

 #define cpu_relax()	asm volatile(""    : : : "memory")

Then how would the two differ:

(1) while (xxx);
(2) while (xxx) cpu_relax();

(I thought they are still the same? maybe I should assume variables in
 xxx should have volatile keywords)

> 
> > +}
> > diff --git a/lib/pci-edu.h b/lib/pci-edu.h
> > new file mode 100644
> > index 0000000..6b7dbfd
> > --- /dev/null
> > +++ b/lib/pci-edu.h
> > @@ -0,0 +1,29 @@
> > +/*
> > + * Edu PCI device header.
> > + *
> > + * Copyright (C) 2016 Red Hat, Inc.
> > + *
> > + * Authors:
> > + *   Peter Xu <peterx@redhat.com>,
> > + *
> > + * This work is licensed under the terms of the GNU LGPL, version 2 or
> > + * later.
> > + *
> > + * Edu device is a virtualized device in QEMU. Please refer to
> > + * docs/specs/edu.txt in QEMU repository for EDU device manual.
> > + */
> > +#ifndef __PCI_EDU_H__
> > +#define __PCI_EDU_H__
> > +
> > +#include "pci.h"
> > +
> > +struct pci_edu_dev {
> > +	pci_dev pci_dev;
> > +};
> > +typedef struct pci_edu_dev pci_edu_dev_t;
> 
> Why wrap pci_dev in this pci_edu_dev struct? Will there ever
> be more state? Should we just wait and see? Also, why use a
> typedef for pci_dev (assuming we want it)? 'struct pci_dev'
> would be more kernel like.

My own preference is to use typedef just like how QEMU is using it, to
avoid typing "struct", and with more flexibility (e.g., I can just
change a struct into union without touching other part of code, as
long as I keep all the existing fields' name there). However I think I
should follow how kvm-unit-tests/Linux's coding style to use struct.

But should I still keep the wrapper even if there is only one field
which is "struct pci_dev"? Benefits:

(1) people won't be able to edu_dma() a PCI device which is not a edu
    device, or say "struct pci_edu_dev" type is checked during
    compilation.

(2) in case edu device will need more fields, we won't need to touch
    everywhere and change the old "struct pci_dev" into "struct
    pci_edu_dev".

Thanks!

-- peterx
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andrew Jones Oct. 25, 2016, 10:43 a.m. UTC | #3
On Tue, Oct 25, 2016 at 11:34:22AM +0800, Peter Xu wrote:
> > You may consider adding a cpu_relax() to lib/x86/asm/barrier.h, like
> > arm and ppc have. I noticed a big difference when running with tcg
> > on how fast a 'while (state-not-changed);' loop can complete when
> > adding that barrier.
> 
> Should I just do:
> 
>  #include <asm-generic/barrier.h>
> 
> in x86/asm/barrier.h, just like ppc?

Nope. See below

> 
> Btw, could you elaborate what would be the difference? I see
> cpu_relax() is defined as:
> 
>  #define cpu_relax()	asm volatile(""    : : : "memory")
> 
> Then how would the two differ:
> 
> (1) while (xxx);
> (2) while (xxx) cpu_relax();

It inserts a compiler-generated memory barrier, which for ARM TCG
helps relinquish some time to QEMU, allowing QEMU to do what it
needs to do in order for the condition to be fulfilled. I just
checked Linux code for x86's definition of cpu_relax(), though, and
see that it's

 /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
 static __always_inline void rep_nop(void)
 {
    asm volatile("rep; nop" ::: "memory");
 }

 static __always_inline void cpu_relax(void)
 {
    rep_nop();
 }

So that may work better for you.

> 
> (I thought they are still the same? maybe I should assume variables in
>  xxx should have volatile keywords)

Don't assume they do, make sure they do. Without volatile for those
variables you may never stop looping.

> 
> > 
> > > +}
> > > diff --git a/lib/pci-edu.h b/lib/pci-edu.h
> > > new file mode 100644
> > > index 0000000..6b7dbfd
> > > --- /dev/null
> > > +++ b/lib/pci-edu.h
> > > @@ -0,0 +1,29 @@
> > > +/*
> > > + * Edu PCI device header.
> > > + *
> > > + * Copyright (C) 2016 Red Hat, Inc.
> > > + *
> > > + * Authors:
> > > + *   Peter Xu <peterx@redhat.com>,
> > > + *
> > > + * This work is licensed under the terms of the GNU LGPL, version 2 or
> > > + * later.
> > > + *
> > > + * Edu device is a virtualized device in QEMU. Please refer to
> > > + * docs/specs/edu.txt in QEMU repository for EDU device manual.
> > > + */
> > > +#ifndef __PCI_EDU_H__
> > > +#define __PCI_EDU_H__
> > > +
> > > +#include "pci.h"
> > > +
> > > +struct pci_edu_dev {
> > > +	pci_dev pci_dev;
> > > +};
> > > +typedef struct pci_edu_dev pci_edu_dev_t;
> > 
> > Why wrap pci_dev in this pci_edu_dev struct? Will there ever
> > be more state? Should we just wait and see? Also, why use a
> > typedef for pci_dev (assuming we want it)? 'struct pci_dev'
> > would be more kernel like.
> 
> My own preference is to use typedef just like how QEMU is using it, to
> avoid typing "struct", and with more flexibility (e.g., I can just
> change a struct into union without touching other part of code, as
> long as I keep all the existing fields' name there). However I think I
> should follow how kvm-unit-tests/Linux's coding style to use struct.

I would. Note, the kernel tends to wrap unions in structs, avoiding
a need to switch the outer type.

> 
> But should I still keep the wrapper even if there is only one field
> which is "struct pci_dev"? Benefits:
> 
> (1) people won't be able to edu_dma() a PCI device which is not a edu
>     device, or say "struct pci_edu_dev" type is checked during
>     compilation.
> 
> (2) in case edu device will need more fields, we won't need to touch
>     everywhere and change the old "struct pci_dev" into "struct
>     pci_edu_dev".

Fair enough, but I think we need to decide if we even need struct
pci_dev first :-) There's no need to introduce any structs if all
we really need to do is pass around devid.

Thanks,
drew
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Xu Oct. 25, 2016, 11:33 a.m. UTC | #4
On Tue, Oct 25, 2016 at 12:43:41PM +0200, Andrew Jones wrote:
> On Tue, Oct 25, 2016 at 11:34:22AM +0800, Peter Xu wrote:
> > > You may consider adding a cpu_relax() to lib/x86/asm/barrier.h, like
> > > arm and ppc have. I noticed a big difference when running with tcg
> > > on how fast a 'while (state-not-changed);' loop can complete when
> > > adding that barrier.
> > 
> > Should I just do:
> > 
> >  #include <asm-generic/barrier.h>
> > 
> > in x86/asm/barrier.h, just like ppc?
> 
> Nope. See below
> 
> > 
> > Btw, could you elaborate what would be the difference? I see
> > cpu_relax() is defined as:
> > 
> >  #define cpu_relax()	asm volatile(""    : : : "memory")
> > 
> > Then how would the two differ:
> > 
> > (1) while (xxx);
> > (2) while (xxx) cpu_relax();
> 
> It inserts a compiler-generated memory barrier, which for ARM TCG
> helps relinquish some time to QEMU, allowing QEMU to do what it
> needs to do in order for the condition to be fulfilled. I just
> checked Linux code for x86's definition of cpu_relax(), though, and
> see that it's
> 
>  /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
>  static __always_inline void rep_nop(void)
>  {
>     asm volatile("rep; nop" ::: "memory");
>  }
> 
>  static __always_inline void cpu_relax(void)
>  {
>     rep_nop();
>  }
> 
> So that may work better for you.

A lesson for cpu_relax(). Thanks. :-)

So I guess "rep; nop" is a arch-specific hint for x86. Will add one
more patch for it.

> 
> > 
> > (I thought they are still the same? maybe I should assume variables in
> >  xxx should have volatile keywords)
> 
> Don't assume they do, make sure they do. Without volatile for those
> variables you may never stop looping.

Noted.

[...]

> > > Why wrap pci_dev in this pci_edu_dev struct? Will there ever
> > > be more state? Should we just wait and see? Also, why use a
> > > typedef for pci_dev (assuming we want it)? 'struct pci_dev'
> > > would be more kernel like.
> > 
> > My own preference is to use typedef just like how QEMU is using it, to
> > avoid typing "struct", and with more flexibility (e.g., I can just
> > change a struct into union without touching other part of code, as
> > long as I keep all the existing fields' name there). However I think I
> > should follow how kvm-unit-tests/Linux's coding style to use struct.
> 
> I would. Note, the kernel tends to wrap unions in structs, avoiding
> a need to switch the outer type.

Hmm... right.

> 
> > 
> > But should I still keep the wrapper even if there is only one field
> > which is "struct pci_dev"? Benefits:
> > 
> > (1) people won't be able to edu_dma() a PCI device which is not a edu
> >     device, or say "struct pci_edu_dev" type is checked during
> >     compilation.
> > 
> > (2) in case edu device will need more fields, we won't need to touch
> >     everywhere and change the old "struct pci_dev" into "struct
> >     pci_edu_dev".
> 
> Fair enough, but I think we need to decide if we even need struct
> pci_dev first :-) There's no need to introduce any structs if all
> we really need to do is pass around devid.

I think that may depend on how we target kvm-unit-tests, and how we
will try to extend PCI functions in kvm-unit-tests in the future. For
now, I needed it mostly because of MSI offset. For PCI bar, indeed we
can fetch it everytime we need it. However for MSI offset, it's hidden
inside capability list. If we don't cache it, we will need to traverse
the capability list every time, which is really not that efficient.

If one day we want to play with MSI-X? Or more? If we have such a
requirement, it seems just natural to have a "struct pci_dev" and keep
all the device specific information along with the object. :-)

Thanks!

-- peterx
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/lib/pci-edu.c b/lib/pci-edu.c
new file mode 100644
index 0000000..4d1a5ab
--- /dev/null
+++ b/lib/pci-edu.c
@@ -0,0 +1,128 @@ 
+/*
+ * Edu PCI device.
+ *
+ * Copyright (C) 2016 Red Hat, Inc.
+ *
+ * Authors:
+ *   Peter Xu <peterx@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or
+ * later.
+ */
+
+#include "pci-edu.h"
+
+#define  PCI_VENDOR_ID_QEMU              (0x1234)
+#define  PCI_DEVICE_ID_EDU               (0x11e8)
+
+/* The only bar used by EDU device */
+#define EDU_BAR_MEM                 (0)
+#define EDU_MAGIC                   (0xed)
+#define EDU_VERSION                 (0x100)
+#define EDU_DMA_BUF_SIZE            (1 << 20)
+#define EDU_INPUT_BUF_SIZE          (256)
+
+#define EDU_REG_ID                  (0x0)
+#define EDU_REG_ALIVE               (0x4)
+#define EDU_REG_FACTORIAL           (0x8)
+#define EDU_REG_STATUS              (0x20)
+#define EDU_REG_DMA_SRC             (0x80)
+#define EDU_REG_DMA_DST             (0x88)
+#define EDU_REG_DMA_COUNT           (0x90)
+#define EDU_REG_DMA_CMD             (0x98)
+
+#define EDU_CMD_DMA_START           (0x01)
+#define EDU_CMD_DMA_FROM            (0x02)
+#define EDU_CMD_DMA_TO              (0x00)
+
+#define EDU_STATUS_FACTORIAL        (0x1)
+#define EDU_STATUS_INT_ENABLE       (0x80)
+
+#define EDU_DMA_START               (0x40000)
+#define EDU_DMA_SIZE_MAX            (4096)
+
+uint64_t edu_reg_readq(pci_edu_dev_t *dev, int reg)
+{
+	return *(volatile uint64_t *)(dev->pci_dev.pci_bar[EDU_BAR_MEM] + reg);
+}
+
+uint32_t edu_reg_read(pci_edu_dev_t *dev, int reg)
+{
+	return *(volatile uint32_t *)(dev->pci_dev.pci_bar[EDU_BAR_MEM] + reg);
+}
+
+void edu_reg_writeq(pci_edu_dev_t *dev, int reg, uint64_t val)
+{
+	*(volatile uint64_t *)(dev->pci_dev.pci_bar[EDU_BAR_MEM] + reg) = val;
+}
+
+void edu_reg_write(pci_edu_dev_t *dev, int reg, uint32_t val)
+{
+	*(volatile uint32_t *)(dev->pci_dev.pci_bar[EDU_BAR_MEM] + reg) = val;
+}
+
+/* Return true if alive */
+bool edu_check_alive(pci_edu_dev_t *dev)
+{
+	static uint32_t live_count = 1;
+	uint32_t value;
+
+	edu_reg_write(dev, EDU_REG_ALIVE, live_count++);
+	value = edu_reg_read(dev, EDU_REG_ALIVE);
+	return (live_count - 1 == ~value);
+}
+
+int edu_init(pci_edu_dev_t *dev)
+{
+	int ret;
+
+	ret = pci_dev_init(&dev->pci_dev, PCI_VENDOR_ID_QEMU,
+			   PCI_DEVICE_ID_EDU);
+	if (ret)
+		return ret;
+
+	if (!edu_check_alive(dev)) {
+		printf("edu device not alive!\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+uint32_t edu_status(pci_edu_dev_t *dev)
+{
+	return edu_reg_read(dev, EDU_REG_STATUS);
+}
+
+void edu_dma(pci_edu_dev_t *dev, iova_t iova,
+	     size_t size, int dev_offset, pci_dma_dir_t dir)
+{
+	uint64_t from, to;
+	uint32_t cmd = EDU_CMD_DMA_START;
+
+	assert(size <= EDU_DMA_SIZE_MAX);
+	assert(dev_offset < EDU_DMA_SIZE_MAX &&
+	       dev_offset >= 0);
+
+	printf("edu device DMA start %s addr %p size 0x%lu off 0x%x\n",
+	       dir == PCI_DMA_FROM_DEVICE ? "FROM" : "TO",
+	       (void *)iova, size, dev_offset);
+
+	if (dir == PCI_DMA_FROM_DEVICE) {
+		from = dev_offset + EDU_DMA_START;
+		to = iova;
+		cmd |= EDU_CMD_DMA_FROM;
+	} else {
+		from = iova;
+		to = EDU_DMA_START + dev_offset;
+		cmd |= EDU_CMD_DMA_TO;
+	}
+
+	edu_reg_writeq(dev, EDU_REG_DMA_SRC, from);
+	edu_reg_writeq(dev, EDU_REG_DMA_DST, to);
+	edu_reg_writeq(dev, EDU_REG_DMA_COUNT, size);
+	edu_reg_write(dev, EDU_REG_DMA_CMD, cmd);
+
+	/* Wait until DMA finished */
+	while (edu_reg_read(dev, EDU_REG_DMA_CMD) & EDU_CMD_DMA_START);
+}
diff --git a/lib/pci-edu.h b/lib/pci-edu.h
new file mode 100644
index 0000000..6b7dbfd
--- /dev/null
+++ b/lib/pci-edu.h
@@ -0,0 +1,29 @@ 
+/*
+ * Edu PCI device header.
+ *
+ * Copyright (C) 2016 Red Hat, Inc.
+ *
+ * Authors:
+ *   Peter Xu <peterx@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or
+ * later.
+ *
+ * Edu device is a virtualized device in QEMU. Please refer to
+ * docs/specs/edu.txt in QEMU repository for EDU device manual.
+ */
+#ifndef __PCI_EDU_H__
+#define __PCI_EDU_H__
+
+#include "pci.h"
+
+struct pci_edu_dev {
+	pci_dev pci_dev;
+};
+typedef struct pci_edu_dev pci_edu_dev_t;
+
+int edu_init(pci_edu_dev_t *dev);
+void edu_dma(pci_edu_dev_t *dev, iova_t iova,
+	     size_t size, int dev_offset, pci_dma_dir_t dir);
+
+#endif