diff mbox series

[v1,04/15] nitro_enclaves: Init PCI device driver

Message ID 20200421184150.68011-5-andraprs@amazon.com (mailing list archive)
State New, archived
Headers show
Series Add support for Nitro Enclaves | expand

Commit Message

Paraschiv, Andra-Irina April 21, 2020, 6:41 p.m. UTC
The Nitro Enclaves PCI device is used by the kernel driver as a means of
communication with the hypervisor on the host where the primary VM and
the enclaves run. It handles requests with regard to enclave lifetime.

Setup the PCI device driver and add support for MSI-X interrupts.

Signed-off-by: Alexandru-Catalin Vasile <lexnv@amazon.com>
Signed-off-by: Alexandru Ciobotaru <alcioa@amazon.com>
Signed-off-by: Andra Paraschiv <andraprs@amazon.com>
---
 .../virt/amazon/nitro_enclaves/ne_pci_dev.c   | 278 ++++++++++++++++++
 1 file changed, 278 insertions(+)
 create mode 100644 drivers/virt/amazon/nitro_enclaves/ne_pci_dev.c

Comments

Liran Alon April 25, 2020, 2:25 p.m. UTC | #1
On 21/04/2020 21:41, Andra Paraschiv wrote:
> +
> +/**
> + * ne_setup_msix - Setup MSI-X vectors for the PCI device.
> + *
> + * @pdev: PCI device to setup the MSI-X for.
> + * @ne_pci_dev: PCI device private data structure.
> + *
> + * @returns: 0 on success, negative return value on failure.
> + */
> +static int ne_setup_msix(struct pci_dev *pdev, struct ne_pci_dev *ne_pci_dev)
> +{
> +	int nr_vecs = 0;
> +	int rc = -EINVAL;
> +
> +	BUG_ON(!ne_pci_dev);
This kind of defensive programming does not align with Linux coding 
convention.
I think these BUG_ON() conditions should be removed.
> +
> +	nr_vecs = pci_msix_vec_count(pdev);
> +	if (nr_vecs < 0) {
> +		rc = nr_vecs;
> +
> +		dev_err_ratelimited(&pdev->dev,
> +				    "Failure in getting vec count [rc=%d]\n",
> +				    rc);
> +
> +		return rc;
> +	}
> +
> +	rc = pci_alloc_irq_vectors(pdev, nr_vecs, nr_vecs, PCI_IRQ_MSIX);
> +	if (rc < 0) {
> +		dev_err_ratelimited(&pdev->dev,
> +				    "Failure in alloc MSI-X vecs [rc=%d]\n",
> +				    rc);
> +
> +		goto err_alloc_irq_vecs;
You should just replace this with "return rc;" as no cleanup is required 
here.
> +	}
> +
> +	return 0;
> +
> +err_alloc_irq_vecs:
> +	return rc;
> +}
> +
> +/**
> + * ne_pci_dev_enable - Select PCI device version and enable it.
> + *
> + * @pdev: PCI device to select version for and then enable.
> + * @ne_pci_dev: PCI device private data structure.
> + *
> + * @returns: 0 on success, negative return value on failure.
> + */
> +static int ne_pci_dev_enable(struct pci_dev *pdev,
> +			     struct ne_pci_dev *ne_pci_dev)
> +{
> +	u8 dev_enable_reply = 0;
> +	u16 dev_version_reply = 0;
> +
> +	BUG_ON(!pdev);
> +	BUG_ON(!ne_pci_dev);
> +	BUG_ON(!ne_pci_dev->iomem_base);
Same.
> +
> +	iowrite16(NE_VERSION_MAX, ne_pci_dev->iomem_base + NE_VERSION);
> +
> +	dev_version_reply = ioread16(ne_pci_dev->iomem_base + NE_VERSION);
> +	if (dev_version_reply != NE_VERSION_MAX) {
> +		dev_err_ratelimited(&pdev->dev,
> +				    "Failure in pci dev version cmd\n");
> +
> +		return -EIO;
> +	}
> +
> +	iowrite8(NE_ENABLE_ON, ne_pci_dev->iomem_base + NE_ENABLE);
> +
> +	dev_enable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
> +	if (dev_enable_reply != NE_ENABLE_ON) {
> +		dev_err_ratelimited(&pdev->dev,
> +				    "Failure in pci dev enable cmd\n");
> +
> +		return -EIO;
> +	}
> +
> +	return 0;
> +}
> +
> +/**
> + * ne_pci_dev_disable - Disable PCI device.
> + *
> + * @pdev: PCI device to disable.
> + * @ne_pci_dev: PCI device private data structure.
> + *
> + * @returns: 0 on success, negative return value on failure.
> + */
> +static int ne_pci_dev_disable(struct pci_dev *pdev,
> +			      struct ne_pci_dev *ne_pci_dev)
> +{
> +	u8 dev_disable_reply = 0;
> +
> +	BUG_ON(!pdev);
> +	BUG_ON(!ne_pci_dev);
> +	BUG_ON(!ne_pci_dev->iomem_base);
Same.
> +
> +	iowrite8(NE_ENABLE_OFF, ne_pci_dev->iomem_base + NE_ENABLE);
> +
> +	/*
> +	 * TODO: Check for NE_ENABLE_OFF in a loop, to handle cases when the
> +	 * device state is not immediately set to disabled and going through a
> +	 * transitory state of disabling.
> +	 */
> +	dev_disable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
> +	if (dev_disable_reply != NE_ENABLE_OFF) {
> +		dev_err_ratelimited(&pdev->dev,
> +				    "Failure in pci dev disable cmd\n");
> +
> +		return -EIO;
> +	}
> +
> +	return 0;
> +}
> +
> +static int ne_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> +{
> +	struct ne_pci_dev *ne_pci_dev = NULL;
> +	int rc = -EINVAL;
Unnecessary variable initialization.
ne_pci_dev and rc are initialized below always before they are used.
> +
> +	ne_pci_dev = kzalloc(sizeof(*ne_pci_dev), GFP_KERNEL);
> +	if (!ne_pci_dev)
> +		return -ENOMEM;
> +
> +	rc = pci_enable_device(pdev);
> +	if (rc < 0) {
> +		dev_err_ratelimited(&pdev->dev,
> +				    "Failure in pci dev enable [rc=%d]\n", rc);
> +
Why is this dev_err_ratelimited() instead of dev_err()?
Same for the rest of error printing in this probe() method and other 
places in this patch.
> +		goto err_pci_enable_dev;
I find it confusing that the error labels are named based on the 
failure-case they are used,
instead of the action they do (i.e. Unwind previous successful operation 
that requires unwinding).
This doesn't seem to match Linux kernel coding convention.
It also created an unnecessary 2 labels pointing to the same place in 
cleanup code.
> +	}
> +
> +	rc = pci_request_regions_exclusive(pdev, "ne_pci_dev");
> +	if (rc < 0) {
> +		dev_err_ratelimited(&pdev->dev,
> +				    "Failure in pci request regions [rc=%d]\n",
> +				    rc);
> +
> +		goto err_req_regions;
> +	}
> +
> +	ne_pci_dev->iomem_base = pci_iomap(pdev, PCI_BAR_NE, 0);
> +	if (!ne_pci_dev->iomem_base) {
> +		rc = -ENOMEM;
> +
> +		dev_err_ratelimited(&pdev->dev,
> +				    "Failure in pci bar mapping [rc=%d]\n", rc);
> +
> +		goto err_iomap;
> +	}
> +
> +	rc = ne_setup_msix(pdev, ne_pci_dev);
> +	if (rc < 0) {
> +		dev_err_ratelimited(&pdev->dev,
> +				    "Failure in pci dev msix setup [rc=%d]\n",
> +				    rc);
> +
> +		goto err_setup_msix;
> +	}
> +
> +	rc = ne_pci_dev_disable(pdev, ne_pci_dev);
> +	if (rc < 0) {
> +		dev_err_ratelimited(&pdev->dev,
> +				    "Failure in ne_pci_dev disable [rc=%d]\n",
> +				    rc);
> +
> +		goto err_ne_pci_dev_disable;
> +	}
It seems weird that we need to disable the device before enabling it on 
the probe() method.
Why can't we just enable the device without disabling it?
> +
> +	rc = ne_pci_dev_enable(pdev, ne_pci_dev);
> +	if (rc < 0) {
> +		dev_err_ratelimited(&pdev->dev,
> +				    "Failure in ne_pci_dev enable [rc=%d]\n",
> +				    rc);
> +
> +		goto err_ne_pci_dev_enable;
> +	}
> +
> +	atomic_set(&ne_pci_dev->cmd_reply_avail, 0);
> +	init_waitqueue_head(&ne_pci_dev->cmd_reply_wait_q);
> +	INIT_LIST_HEAD(&ne_pci_dev->enclaves_list);
> +	mutex_init(&ne_pci_dev->enclaves_list_mutex);
> +	mutex_init(&ne_pci_dev->pci_dev_mutex);
> +
> +	pci_set_drvdata(pdev, ne_pci_dev);
If you would have pci_set_drvdata() as one of the first operations in 
ne_probe(), then you could have avoided
passing both struct pci_dev  and struct ne_pci_dev parameters to 
ne_setup_msix(), ne_pci_dev_enable() and ne_pci_dev_disable().
Which would have been a bit more elegant.
> +
> +	return 0;
> +
> +err_ne_pci_dev_enable:
> +err_ne_pci_dev_disable:
> +	pci_free_irq_vectors(pdev);
> +err_setup_msix:
> +	pci_iounmap(pdev, ne_pci_dev->iomem_base);
> +err_iomap:
> +	pci_release_regions(pdev);
> +err_req_regions:
> +	pci_disable_device(pdev);
> +err_pci_enable_dev:
> +	kzfree(ne_pci_dev);
An empty new-line is appropriate here.
To separate the return statement from the cleanup logic.
> +	return rc;
> +}
> +
> +static void ne_remove(struct pci_dev *pdev)
> +{
> +	struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
> +
> +	if (!ne_pci_dev || !ne_pci_dev->iomem_base)
> +		return;
Why is this condition necessary?
The ne_remove() function should be called only in case ne_probe() succeeded.
In that case, both ne_pci_dev and ne_pci_dev->iomem_base should be non-NULL.
> +
> +	ne_pci_dev_disable(pdev, ne_pci_dev);
> +
> +	pci_set_drvdata(pdev, NULL);
> +
> +	pci_free_irq_vectors(pdev);
> +
> +	pci_iounmap(pdev, ne_pci_dev->iomem_base);
> +
> +	kzfree(ne_pci_dev);
> +
> +	pci_release_regions(pdev);
> +
> +	pci_disable_device(pdev);
You should aspire to keep ne_remove() order of operations to be the 
reverse order of operations done in ne_probe().
Which would also nicely match the order of operations done in ne_probe() 
cleanup.
i.e. The following order:

pci_set_drvdata();
ne_pci_dev_disable();
pci_free_irq_vectors();
pci_iounmap();
pci_release_regions();
pci_disable_device()
kzfree();

-Liran
Paraschiv, Andra-Irina April 29, 2020, 4:31 p.m. UTC | #2
On 25/04/2020 17:25, Liran Alon wrote:
>
> On 21/04/2020 21:41, Andra Paraschiv wrote:
>> +
>> +/**
>> + * ne_setup_msix - Setup MSI-X vectors for the PCI device.
>> + *
>> + * @pdev: PCI device to setup the MSI-X for.
>> + * @ne_pci_dev: PCI device private data structure.
>> + *
>> + * @returns: 0 on success, negative return value on failure.
>> + */
>> +static int ne_setup_msix(struct pci_dev *pdev, struct ne_pci_dev 
>> *ne_pci_dev)
>> +{
>> +    int nr_vecs = 0;
>> +    int rc = -EINVAL;
>> +
>> +    BUG_ON(!ne_pci_dev);
> This kind of defensive programming does not align with Linux coding 
> convention.
> I think these BUG_ON() conditions should be removed.

I replaced with WARN_ON here and in the other places in the codebase.

>> +
>> +    nr_vecs = pci_msix_vec_count(pdev);
>> +    if (nr_vecs < 0) {
>> +        rc = nr_vecs;
>> +
>> +        dev_err_ratelimited(&pdev->dev,
>> +                    "Failure in getting vec count [rc=%d]\n",
>> +                    rc);
>> +
>> +        return rc;
>> +    }
>> +
>> +    rc = pci_alloc_irq_vectors(pdev, nr_vecs, nr_vecs, PCI_IRQ_MSIX);
>> +    if (rc < 0) {
>> +        dev_err_ratelimited(&pdev->dev,
>> +                    "Failure in alloc MSI-X vecs [rc=%d]\n",
>> +                    rc);
>> +
>> +        goto err_alloc_irq_vecs;
> You should just replace this with "return rc;" as no cleanup is 
> required here.

Done.

>> +    }
>> +
>> +    return 0;
>> +
>> +err_alloc_irq_vecs:
>> +    return rc;
>> +}
>> +
>> +/**
>> + * ne_pci_dev_enable - Select PCI device version and enable it.
>> + *
>> + * @pdev: PCI device to select version for and then enable.
>> + * @ne_pci_dev: PCI device private data structure.
>> + *
>> + * @returns: 0 on success, negative return value on failure.
>> + */
>> +static int ne_pci_dev_enable(struct pci_dev *pdev,
>> +                 struct ne_pci_dev *ne_pci_dev)
>> +{
>> +    u8 dev_enable_reply = 0;
>> +    u16 dev_version_reply = 0;
>> +
>> +    BUG_ON(!pdev);
>> +    BUG_ON(!ne_pci_dev);
>> +    BUG_ON(!ne_pci_dev->iomem_base);
> Same.
>> +
>> +    iowrite16(NE_VERSION_MAX, ne_pci_dev->iomem_base + NE_VERSION);
>> +
>> +    dev_version_reply = ioread16(ne_pci_dev->iomem_base + NE_VERSION);
>> +    if (dev_version_reply != NE_VERSION_MAX) {
>> +        dev_err_ratelimited(&pdev->dev,
>> +                    "Failure in pci dev version cmd\n");
>> +
>> +        return -EIO;
>> +    }
>> +
>> +    iowrite8(NE_ENABLE_ON, ne_pci_dev->iomem_base + NE_ENABLE);
>> +
>> +    dev_enable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
>> +    if (dev_enable_reply != NE_ENABLE_ON) {
>> +        dev_err_ratelimited(&pdev->dev,
>> +                    "Failure in pci dev enable cmd\n");
>> +
>> +        return -EIO;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +/**
>> + * ne_pci_dev_disable - Disable PCI device.
>> + *
>> + * @pdev: PCI device to disable.
>> + * @ne_pci_dev: PCI device private data structure.
>> + *
>> + * @returns: 0 on success, negative return value on failure.
>> + */
>> +static int ne_pci_dev_disable(struct pci_dev *pdev,
>> +                  struct ne_pci_dev *ne_pci_dev)
>> +{
>> +    u8 dev_disable_reply = 0;
>> +
>> +    BUG_ON(!pdev);
>> +    BUG_ON(!ne_pci_dev);
>> +    BUG_ON(!ne_pci_dev->iomem_base);
> Same.
>> +
>> +    iowrite8(NE_ENABLE_OFF, ne_pci_dev->iomem_base + NE_ENABLE);
>> +
>> +    /*
>> +     * TODO: Check for NE_ENABLE_OFF in a loop, to handle cases when 
>> the
>> +     * device state is not immediately set to disabled and going 
>> through a
>> +     * transitory state of disabling.
>> +     */
>> +    dev_disable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
>> +    if (dev_disable_reply != NE_ENABLE_OFF) {
>> +        dev_err_ratelimited(&pdev->dev,
>> +                    "Failure in pci dev disable cmd\n");
>> +
>> +        return -EIO;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int ne_probe(struct pci_dev *pdev, const struct pci_device_id 
>> *id)
>> +{
>> +    struct ne_pci_dev *ne_pci_dev = NULL;
>> +    int rc = -EINVAL;
> Unnecessary variable initialization.
> ne_pci_dev and rc are initialized below always before they are used.

I would rather keep the initialization in place overall, to not have a 
mix of init and uninit vars, when needed.

>> +
>> +    ne_pci_dev = kzalloc(sizeof(*ne_pci_dev), GFP_KERNEL);
>> +    if (!ne_pci_dev)
>> +        return -ENOMEM;
>> +
>> +    rc = pci_enable_device(pdev);
>> +    if (rc < 0) {
>> +        dev_err_ratelimited(&pdev->dev,
>> +                    "Failure in pci dev enable [rc=%d]\n", rc);
>> +
> Why is this dev_err_ratelimited() instead of dev_err()?
> Same for the rest of error printing in this probe() method and other 
> places in this patch.

Just to avoid any misbehaving scenario, where would be way too many logs 
in a short timeframe. Here may not happen, but while handling PCI dev 
requests could be.

>> +        goto err_pci_enable_dev;
> I find it confusing that the error labels are named based on the 
> failure-case they are used,
> instead of the action they do (i.e. Unwind previous successful 
> operation that requires unwinding).
> This doesn't seem to match Linux kernel coding convention.
> It also created an unnecessary 2 labels pointing to the same place in 
> cleanup code.

Yep, that's better this way wrt the naming of the labels. I updated the 
gotos in the patch series.

>> +    }
>> +
>> +    rc = pci_request_regions_exclusive(pdev, "ne_pci_dev");
>> +    if (rc < 0) {
>> +        dev_err_ratelimited(&pdev->dev,
>> +                    "Failure in pci request regions [rc=%d]\n",
>> +                    rc);
>> +
>> +        goto err_req_regions;
>> +    }
>> +
>> +    ne_pci_dev->iomem_base = pci_iomap(pdev, PCI_BAR_NE, 0);
>> +    if (!ne_pci_dev->iomem_base) {
>> +        rc = -ENOMEM;
>> +
>> +        dev_err_ratelimited(&pdev->dev,
>> +                    "Failure in pci bar mapping [rc=%d]\n", rc);
>> +
>> +        goto err_iomap;
>> +    }
>> +
>> +    rc = ne_setup_msix(pdev, ne_pci_dev);
>> +    if (rc < 0) {
>> +        dev_err_ratelimited(&pdev->dev,
>> +                    "Failure in pci dev msix setup [rc=%d]\n",
>> +                    rc);
>> +
>> +        goto err_setup_msix;
>> +    }
>> +
>> +    rc = ne_pci_dev_disable(pdev, ne_pci_dev);
>> +    if (rc < 0) {
>> +        dev_err_ratelimited(&pdev->dev,
>> +                    "Failure in ne_pci_dev disable [rc=%d]\n",
>> +                    rc);
>> +
>> +        goto err_ne_pci_dev_disable;
>> +    }
> It seems weird that we need to disable the device before enabling it 
> on the probe() method.
> Why can't we just enable the device without disabling it?

The pci dev disable call cleanups the internal state of the device and 
terminates any running / "dangling" enclaves; here it is included just 
in case any remaining state from a previous PCI device use. The below 
enable call would fail in that case, though.

>> +
>> +    rc = ne_pci_dev_enable(pdev, ne_pci_dev);
>> +    if (rc < 0) {
>> +        dev_err_ratelimited(&pdev->dev,
>> +                    "Failure in ne_pci_dev enable [rc=%d]\n",
>> +                    rc);
>> +
>> +        goto err_ne_pci_dev_enable;
>> +    }
>> +
>> +    atomic_set(&ne_pci_dev->cmd_reply_avail, 0);
>> +    init_waitqueue_head(&ne_pci_dev->cmd_reply_wait_q);
>> +    INIT_LIST_HEAD(&ne_pci_dev->enclaves_list);
>> +    mutex_init(&ne_pci_dev->enclaves_list_mutex);
>> +    mutex_init(&ne_pci_dev->pci_dev_mutex);
>> +
>> +    pci_set_drvdata(pdev, ne_pci_dev);
> If you would have pci_set_drvdata() as one of the first operations in 
> ne_probe(), then you could have avoided
> passing both struct pci_dev  and struct ne_pci_dev parameters to 
> ne_setup_msix(), ne_pci_dev_enable() and ne_pci_dev_disable().
> Which would have been a bit more elegant.

Fair point. I moved pci_set_drvdata() upper in the logic and updated the 
signature of the functions to only include the pci_dev parameter.

>> +
>> +    return 0;
>> +
>> +err_ne_pci_dev_enable:
>> +err_ne_pci_dev_disable:
>> +    pci_free_irq_vectors(pdev);
>> +err_setup_msix:
>> +    pci_iounmap(pdev, ne_pci_dev->iomem_base);
>> +err_iomap:
>> +    pci_release_regions(pdev);
>> +err_req_regions:
>> +    pci_disable_device(pdev);
>> +err_pci_enable_dev:
>> +    kzfree(ne_pci_dev);
> An empty new-line is appropriate here.
> To separate the return statement from the cleanup logic.

Done.

>> +    return rc;
>> +}
>> +
>> +static void ne_remove(struct pci_dev *pdev)
>> +{
>> +    struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
>> +
>> +    if (!ne_pci_dev || !ne_pci_dev->iomem_base)
>> +        return;
> Why is this condition necessary?
> The ne_remove() function should be called only in case ne_probe() 
> succeeded.
> In that case, both ne_pci_dev and ne_pci_dev->iomem_base should be 
> non-NULL.

Correct, that shouldn't happen.

Just for early exit in case of bad behavior.

>> +
>> +    ne_pci_dev_disable(pdev, ne_pci_dev);
>> +
>> +    pci_set_drvdata(pdev, NULL);
>> +
>> +    pci_free_irq_vectors(pdev);
>> +
>> +    pci_iounmap(pdev, ne_pci_dev->iomem_base);
>> +
>> +    kzfree(ne_pci_dev);
>> +
>> +    pci_release_regions(pdev);
>> +
>> +    pci_disable_device(pdev);
> You should aspire to keep ne_remove() order of operations to be the 
> reverse order of operations done in ne_probe().
> Which would also nicely match the order of operations done in 
> ne_probe() cleanup.
> i.e. The following order:
>
> pci_set_drvdata();
> ne_pci_dev_disable();
> pci_free_irq_vectors();
> pci_iounmap();
> pci_release_regions();
> pci_disable_device()
> kzfree();

I updated the order of operations.


Thanks for review, Liran.

Andra





Amazon Development Center (Romania) S.R.L. registered office: 27A Sf. Lazar Street, UBC5, floor 2, Iasi, Iasi County, 700045, Romania. Registered in Romania. Registration number J22/2621/2005.
diff mbox series

Patch

diff --git a/drivers/virt/amazon/nitro_enclaves/ne_pci_dev.c b/drivers/virt/amazon/nitro_enclaves/ne_pci_dev.c
new file mode 100644
index 000000000000..8fbee95ea291
--- /dev/null
+++ b/drivers/virt/amazon/nitro_enclaves/ne_pci_dev.c
@@ -0,0 +1,278 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* Nitro Enclaves (NE) PCI device driver. */
+
+#include <linux/bug.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/module.h>
+#include <linux/nitro_enclaves.h>
+#include <linux/pci.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+
+#include "ne_misc_dev.h"
+#include "ne_pci_dev.h"
+
+#define DEFAULT_TIMEOUT_MSECS (120000) // 120 sec
+
+static const struct pci_device_id ne_pci_ids[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_NE) },
+	{ 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, ne_pci_ids);
+
+/**
+ * ne_setup_msix - Setup MSI-X vectors for the PCI device.
+ *
+ * @pdev: PCI device to setup the MSI-X for.
+ * @ne_pci_dev: PCI device private data structure.
+ *
+ * @returns: 0 on success, negative return value on failure.
+ */
+static int ne_setup_msix(struct pci_dev *pdev, struct ne_pci_dev *ne_pci_dev)
+{
+	int nr_vecs = 0;
+	int rc = -EINVAL;
+
+	BUG_ON(!ne_pci_dev);
+
+	nr_vecs = pci_msix_vec_count(pdev);
+	if (nr_vecs < 0) {
+		rc = nr_vecs;
+
+		dev_err_ratelimited(&pdev->dev,
+				    "Failure in getting vec count [rc=%d]\n",
+				    rc);
+
+		return rc;
+	}
+
+	rc = pci_alloc_irq_vectors(pdev, nr_vecs, nr_vecs, PCI_IRQ_MSIX);
+	if (rc < 0) {
+		dev_err_ratelimited(&pdev->dev,
+				    "Failure in alloc MSI-X vecs [rc=%d]\n",
+				    rc);
+
+		goto err_alloc_irq_vecs;
+	}
+
+	return 0;
+
+err_alloc_irq_vecs:
+	return rc;
+}
+
+/**
+ * ne_pci_dev_enable - Select PCI device version and enable it.
+ *
+ * @pdev: PCI device to select version for and then enable.
+ * @ne_pci_dev: PCI device private data structure.
+ *
+ * @returns: 0 on success, negative return value on failure.
+ */
+static int ne_pci_dev_enable(struct pci_dev *pdev,
+			     struct ne_pci_dev *ne_pci_dev)
+{
+	u8 dev_enable_reply = 0;
+	u16 dev_version_reply = 0;
+
+	BUG_ON(!pdev);
+	BUG_ON(!ne_pci_dev);
+	BUG_ON(!ne_pci_dev->iomem_base);
+
+	iowrite16(NE_VERSION_MAX, ne_pci_dev->iomem_base + NE_VERSION);
+
+	dev_version_reply = ioread16(ne_pci_dev->iomem_base + NE_VERSION);
+	if (dev_version_reply != NE_VERSION_MAX) {
+		dev_err_ratelimited(&pdev->dev,
+				    "Failure in pci dev version cmd\n");
+
+		return -EIO;
+	}
+
+	iowrite8(NE_ENABLE_ON, ne_pci_dev->iomem_base + NE_ENABLE);
+
+	dev_enable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
+	if (dev_enable_reply != NE_ENABLE_ON) {
+		dev_err_ratelimited(&pdev->dev,
+				    "Failure in pci dev enable cmd\n");
+
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * ne_pci_dev_disable - Disable PCI device.
+ *
+ * @pdev: PCI device to disable.
+ * @ne_pci_dev: PCI device private data structure.
+ *
+ * @returns: 0 on success, negative return value on failure.
+ */
+static int ne_pci_dev_disable(struct pci_dev *pdev,
+			      struct ne_pci_dev *ne_pci_dev)
+{
+	u8 dev_disable_reply = 0;
+
+	BUG_ON(!pdev);
+	BUG_ON(!ne_pci_dev);
+	BUG_ON(!ne_pci_dev->iomem_base);
+
+	iowrite8(NE_ENABLE_OFF, ne_pci_dev->iomem_base + NE_ENABLE);
+
+	/*
+	 * TODO: Check for NE_ENABLE_OFF in a loop, to handle cases when the
+	 * device state is not immediately set to disabled and going through a
+	 * transitory state of disabling.
+	 */
+	dev_disable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
+	if (dev_disable_reply != NE_ENABLE_OFF) {
+		dev_err_ratelimited(&pdev->dev,
+				    "Failure in pci dev disable cmd\n");
+
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int ne_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct ne_pci_dev *ne_pci_dev = NULL;
+	int rc = -EINVAL;
+
+	ne_pci_dev = kzalloc(sizeof(*ne_pci_dev), GFP_KERNEL);
+	if (!ne_pci_dev)
+		return -ENOMEM;
+
+	rc = pci_enable_device(pdev);
+	if (rc < 0) {
+		dev_err_ratelimited(&pdev->dev,
+				    "Failure in pci dev enable [rc=%d]\n", rc);
+
+		goto err_pci_enable_dev;
+	}
+
+	rc = pci_request_regions_exclusive(pdev, "ne_pci_dev");
+	if (rc < 0) {
+		dev_err_ratelimited(&pdev->dev,
+				    "Failure in pci request regions [rc=%d]\n",
+				    rc);
+
+		goto err_req_regions;
+	}
+
+	ne_pci_dev->iomem_base = pci_iomap(pdev, PCI_BAR_NE, 0);
+	if (!ne_pci_dev->iomem_base) {
+		rc = -ENOMEM;
+
+		dev_err_ratelimited(&pdev->dev,
+				    "Failure in pci bar mapping [rc=%d]\n", rc);
+
+		goto err_iomap;
+	}
+
+	rc = ne_setup_msix(pdev, ne_pci_dev);
+	if (rc < 0) {
+		dev_err_ratelimited(&pdev->dev,
+				    "Failure in pci dev msix setup [rc=%d]\n",
+				    rc);
+
+		goto err_setup_msix;
+	}
+
+	rc = ne_pci_dev_disable(pdev, ne_pci_dev);
+	if (rc < 0) {
+		dev_err_ratelimited(&pdev->dev,
+				    "Failure in ne_pci_dev disable [rc=%d]\n",
+				    rc);
+
+		goto err_ne_pci_dev_disable;
+	}
+
+	rc = ne_pci_dev_enable(pdev, ne_pci_dev);
+	if (rc < 0) {
+		dev_err_ratelimited(&pdev->dev,
+				    "Failure in ne_pci_dev enable [rc=%d]\n",
+				    rc);
+
+		goto err_ne_pci_dev_enable;
+	}
+
+	atomic_set(&ne_pci_dev->cmd_reply_avail, 0);
+	init_waitqueue_head(&ne_pci_dev->cmd_reply_wait_q);
+	INIT_LIST_HEAD(&ne_pci_dev->enclaves_list);
+	mutex_init(&ne_pci_dev->enclaves_list_mutex);
+	mutex_init(&ne_pci_dev->pci_dev_mutex);
+
+	pci_set_drvdata(pdev, ne_pci_dev);
+
+	return 0;
+
+err_ne_pci_dev_enable:
+err_ne_pci_dev_disable:
+	pci_free_irq_vectors(pdev);
+err_setup_msix:
+	pci_iounmap(pdev, ne_pci_dev->iomem_base);
+err_iomap:
+	pci_release_regions(pdev);
+err_req_regions:
+	pci_disable_device(pdev);
+err_pci_enable_dev:
+	kzfree(ne_pci_dev);
+	return rc;
+}
+
+static void ne_remove(struct pci_dev *pdev)
+{
+	struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
+
+	if (!ne_pci_dev || !ne_pci_dev->iomem_base)
+		return;
+
+	ne_pci_dev_disable(pdev, ne_pci_dev);
+
+	pci_set_drvdata(pdev, NULL);
+
+	pci_free_irq_vectors(pdev);
+
+	pci_iounmap(pdev, ne_pci_dev->iomem_base);
+
+	kzfree(ne_pci_dev);
+
+	pci_release_regions(pdev);
+
+	pci_disable_device(pdev);
+}
+
+/*
+ * TODO: Add suspend / resume functions for power management w/ CONFIG_PM, if
+ * needed.
+ */
+struct pci_driver ne_pci_driver = {
+	.name		= "ne_pci_dev",
+	.id_table	= ne_pci_ids,
+	.probe		= ne_probe,
+	.remove		= ne_remove,
+};