[v2] cxlflash: Base support for IBM CXL Flash Adapter

Message ID	1432332894-11199-1-git-send-email-mrochs@linux.vnet.ibm.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-scsi-owner@kernel.org> Gateway: Authorized Use Only! Violators will be prosecuted for <linux-scsi@vger.kernel.org> from <mrochs@linux.vnet.ibm.com>; Fri, 22 May 2015 16:16:07 -0600 Gateway: Authorized Use Only! Violators will be prosecuted; Fri, 22 May 2015 16:16:05 -0600 From: "Matthew R. Ochs" <mrochs@linux.vnet.ibm.com> To: linux-scsi@vger.kernel.org, James.Bottomley@HansenPartnership.com, nab@linux-iscsi.org, brking@linux.vnet.ibm.com, hch@infradead.org Cc: mikey@neuling.org, imunsie@au1.ibm.com, "Manoj N. Kumar" <manoj@linux.vnet.ibm.com> Subject: [PATCH v2] cxlflash: Base support for IBM CXL Flash Adapter Date: Fri, 22 May 2015 17:14:54 -0500 Message-Id: <1432332894-11199-1-git-send-email-mrochs@linux.vnet.ibm.com> Sender: linux-scsi-owner@vger.kernel.org Precedence: bulk

Message ID

1432332894-11199-1-git-send-email-mrochs@linux.vnet.ibm.com (mailing list archive)

State

New, archived

Headers

From: "Matthew R. Ochs" <mrochs@linux.vnet.ibm.com>
To: linux-scsi@vger.kernel.org, James.Bottomley@HansenPartnership.com,
	nab@linux-iscsi.org, brking@linux.vnet.ibm.com, hch@infradead.org
Cc: mikey@neuling.org, imunsie@au1.ibm.com,
	"Manoj N. Kumar" <manoj@linux.vnet.ibm.com>
Subject: [PATCH v2] cxlflash: Base support for IBM CXL Flash Adapter
Date: Fri, 22 May 2015 17:14:54 -0500
Message-Id: <1432332894-11199-1-git-send-email-mrochs@linux.vnet.ibm.com>
Sender: linux-scsi-owner@vger.kernel.org
Precedence: bulk

Commit Message

Matthew R. Ochs May 22, 2015, 10:14 p.m. UTC

SCSI device driver to support filesystem access on the IBM CXL Flash adapter.

Signed-off-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>
Signed-off-by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>
---
 drivers/scsi/Kconfig            |    1 +
 drivers/scsi/Makefile           |    1 +
 drivers/scsi/cxlflash/Kconfig   |   11 +
 drivers/scsi/cxlflash/Makefile  |    2 +
 drivers/scsi/cxlflash/common.h  |  180 ++++
 drivers/scsi/cxlflash/main.c    | 2242 +++++++++++++++++++++++++++++++++++++++
 drivers/scsi/cxlflash/main.h    |  111 ++
 drivers/scsi/cxlflash/sislite.h |  465 ++++++++
 8 files changed, 3013 insertions(+)
 create mode 100644 drivers/scsi/cxlflash/Kconfig
 create mode 100644 drivers/scsi/cxlflash/Makefile
 create mode 100644 drivers/scsi/cxlflash/common.h
 create mode 100644 drivers/scsi/cxlflash/main.c
 create mode 100644 drivers/scsi/cxlflash/main.h
 create mode 100755 drivers/scsi/cxlflash/sislite.h

Comments

Brian King May 29, 2015, 8:54 p.m. UTC | #1

Hi Matt,

Comments below...

-Brian


On 05/22/2015 05:14 PM, Matthew R. Ochs wrote:
> +++ b/drivers/scsi/cxlflash/common.h
> @@ -0,0 +1,180 @@
> +/*
> + * CXL Flash Device Driver
> + *
> + * Written by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>, IBM Corporation
> + *             Matthew R. Ochs <mrochs@linux.vnet.ibm.com>, IBM Corporation
> + *
> + * Copyright (C) 2015 IBM Corporation
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#ifndef _CXLFLASH_COMMON_H
> +#define _CXLFLASH_COMMON_H
> +
> +#include <linux/list.h>
> +#include <linux/types.h>
> +#include <scsi/scsi.h>
> +#include <scsi/scsi_device.h>
> +
> +
> +#define MAX_CONTEXT  CXLFLASH_MAX_CONTEXT       /* num contexts per afu */
> +
> +#define CXLFLASH_BLOCK_SIZE	4096	/* 4K blocks */
> +#define CXLFLASH_MAX_XFER_SIZE	16777216	/* 16MB transfer */
> +#define CXLFLASH_MAX_SECTORS	(CXLFLASH_MAX_XFER_SIZE/CXLFLASH_BLOCK_SIZE)
> +
> +#define NUM_RRQ_ENTRY    16     /* for master issued cmds */
> +#define MAX_RHT_PER_CONTEXT (PAGE_SIZE / sizeof(struct sisl_rht_entry))
> +
> +/* AFU command retry limit */
> +#define MC_RETRY_CNT         5	/* sufficient for SCSI check and
> +				   certain AFU errors */
> +
> +/* Command management definitions */
> +#define CXLFLASH_NUM_CMDS	(2 * CXLFLASH_MAX_CMDS)	/* Must be a pow2 for
> +							   alignment and more
> +							   efficient array
> +							   index derivation
> +							 */
> +
> +#define CXLFLASH_MAX_CMDS               16
> +#define CXLFLASH_MAX_CMDS_PER_LUN       CXLFLASH_MAX_CMDS
> +
> +/* Check for power of 2 at compile time */
> +#define NOT_POW2(_x) ((_x) && ((_x) & ((_x) - 1)))
> +#if NOT_POW2(CXLFLASH_NUM_CMDS)
> +#error "CXLFLASH_NUM_CMDS is not a power of 2!"
> +#endif

Can you use BUILD_BUG_ON_NOT_POWER_OF_2 in include/linux/bug.h for this instead?

> +
> +/* AFU defines a fixed size of 4K for command buffers (borrow 4K page define) */
> +#define CMD_BUFSIZE     SIZE_4K
> +
> +/* flags in IOA status area for host use */
> +#define B_DONE       0x01
> +#define B_ERROR      0x02	/* set with B_DONE */
> +#define B_TIMEOUT    0x04	/* set with B_DONE & B_ERROR */
> +
> +enum cxlflash_lr_state {
> +	LINK_RESET_INVALID,
> +	LINK_RESET_REQUIRED,
> +	LINK_RESET_COMPLETE
> +};
> +
> +enum cxlflash_init_state {
> +	INIT_STATE_NONE,
> +	INIT_STATE_AFU,
> +	INIT_STATE_PCI,
> +	INIT_STATE_SCSI
> +};
> +
> +/*
> + * Each context has its own set of resource handles that is visible
> + * only from that context.
> + */
> +
> +struct cxlflash_cfg {
> +	struct afu *afu;
> +	struct cxl_context *mcctx;
> +
> +	struct pci_dev *dev;
> +	struct pci_device_id *dev_id;
> +	struct Scsi_Host *host;
> +
> +	unsigned long cxlflash_regs_pci;
> +
> +	wait_queue_head_t reset_wait_q;
> +	wait_queue_head_t msi_wait_q;
> +	wait_queue_head_t eeh_wait_q;
> +
> +	struct work_struct work_q;
> +	enum cxlflash_init_state init_state;
> +	enum cxlflash_lr_state lr_state;
> +	int lr_port;
> +
> +	struct cxl_afu *cxl_afu;
> +
> +	struct pci_pool *cxlflash_cmd_pool;
> +	struct pci_dev *parent_dev;
> +
> +	wait_queue_head_t tmf_wait_q;
> +	wait_queue_head_t sync_wait_q;
> +	u8 tmf_active:1;
> +	u8 sync_active:1;
> +};
> +
> +struct afu_cmd {
> +	struct sisl_ioarcb rcb;	/* IOARCB (cache line aligned) */
> +	struct sisl_ioasa sa;	/* IOASA must follow IOARCB */
> +	spinlock_t slock;
> +	struct timer_list timer;
> +	char *buf;		/* per command buffer */
> +	struct afu *parent;
> +	int slot;
> +	atomic_t free;
> +	u8 special:1;
> +	u8 internal:1;
> +	u8 sync:1;
> +
> +	/* As per the SISLITE spec the IOARCB EA has to be 16-byte aligned.
> +	 * However for performance reasons the IOARCB/IOASA should be
> +	 * cache line aligned.
> +	 */
> +} __aligned(cache_line_size());
> +
> +struct afu {
> +	/* Stuff requiring alignment go first. */
> +
> +	u64 rrq_entry[NUM_RRQ_ENTRY];	/* 128B RRQ */
> +	/*
> +	 * Command & data for AFU commands.
> +	 */
> +	struct afu_cmd cmd[CXLFLASH_NUM_CMDS];
> +
> +	/* Beware of alignment till here. Preferably introduce new
> +	 * fields after this point
> +	 */
> +
> +	/* AFU HW */
> +	int afu_fd;
> +	struct cxl_ioctl_start_work work;
> +	volatile struct cxlflash_afu_map *afu_map;	/* entire MMIO map */
> +	volatile struct sisl_host_map *host_map;	/* MC host map */
> +	volatile struct sisl_ctrl_map *ctrl_map;	/* MC control map */
> +
> +	ctx_hndl_t ctx_hndl;	/* master's context handle */
> +	u64 *hrrq_start;
> +	u64 *hrrq_end;
> +	volatile u64 *hrrq_curr;
> +	bool toggle;
> +	u64 room;
> +	u64 hb;
> +	u32 cmd_couts;		/* Number of command checkouts */
> +	u32 internal_lun;	/* User-desired LUN mode for this AFU */
> +
> +	char version[8];
> +	u64 interface_version;
> +
> +	struct cxlflash_cfg *parent; /* Pointer back to parent cxlflash_cfg */
> +
> +};
> +
> +static inline u64 lun_to_lunid(u64 lun)
> +{
> +	u64 lun_id;
> +
> +	int_to_scsilun(lun, (struct scsi_lun *)&lun_id);
> +	return swab64(lun_id);
> +}
> +
> +int cxlflash_send_cmd(struct afu *, struct afu_cmd *);
> +void cxlflash_wait_resp(struct afu *, struct afu_cmd *);
> +int cxlflash_afu_reset(struct cxlflash_cfg *);
> +struct afu_cmd *cxlflash_cmd_checkout(struct afu *);
> +void cxlflash_cmd_checkin(struct afu_cmd *);
> +int cxlflash_afu_sync(struct afu *, ctx_hndl_t, res_hndl_t, u8);
> +#endif /* ifndef _CXLFLASH_COMMON_H */
> +
> diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
> new file mode 100644
> index 0000000..d16f8b2
> --- /dev/null
> +++ b/drivers/scsi/cxlflash/main.c
> @@ -0,0 +1,2242 @@
> +/*
> + * CXL Flash Device Driver
> + *
> + * Written by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>, IBM Corporation
> + *             Matthew R. Ochs <mrochs@linux.vnet.ibm.com>, IBM Corporation
> + *
> + * Copyright (C) 2015 IBM Corporation
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#include <linux/delay.h>
> +#include <linux/list.h>
> +#include <linux/module.h>
> +#include <linux/pci.h>
> +
> +#include <asm/unaligned.h>
> +
> +#include <misc/cxl.h>
> +
> +#include <scsi/scsi_cmnd.h>
> +#include <scsi/scsi_host.h>
> +
> +#include "main.h"
> +#include "sislite.h"
> +#include "common.h"
> +
> +MODULE_DESCRIPTION(CXLFLASH_ADAPTER_NAME);
> +MODULE_AUTHOR("Manoj N. Kumar <manoj@linux.vnet.ibm.com>");
> +MODULE_AUTHOR("Matthew R. Ochs <mrochs@linux.vnet.ibm.com>");
> +MODULE_LICENSE("GPL");
> +
> +
> +/**
> + * cxlflash_cmd_checkout() - checks out an AFU command
> + * @afu:	AFU to checkout from.
> + *
> + * Commands are checked out in a round-robin fashion. Note that since
> + * the command pool is larger than the hardware queue, the majority of
> + * times we will only loop once or twice before getting a command. The
> + * buffer and CDB within the command are initialized (zeroed) prior to
> + * returning.
> + *
> + * Return: The checked out command or NULL when command pool is empty.
> + */
> +struct afu_cmd *cxlflash_cmd_checkout(struct afu *afu)
> +{
> +	int k, dec = CXLFLASH_NUM_CMDS;
> +	struct afu_cmd *cmd;
> +
> +	while (dec--) {
> +		k = (afu->cmd_couts++ & (CXLFLASH_NUM_CMDS - 1));
> +
> +		cmd = &afu->cmd[k];
> +
> +		if (!atomic_dec_if_positive(&cmd->free)) {
> +			pr_debug("%s: returning found index=%d\n",
> +				 __func__, cmd->slot);
> +			memset(cmd->buf, 0, CMD_BUFSIZE);
> +			memset(cmd->rcb.cdb, 0, sizeof(cmd->rcb.cdb));
> +			return cmd;
> +		}
> +	}
> +
> +	return NULL;
> +}
> +
> +/**
> + * cxlflash_cmd_checkin() - checks in an AFU command
> + * @cmd:	AFU command to checkin.
> + *
> + * Safe to pass commands that have already been checked in. Several
> + * internal tracking fields are reset as part of the checkin.
> + */
> +void cxlflash_cmd_checkin(struct afu_cmd *cmd)
> +{
> +	if (unlikely(atomic_inc_return(&cmd->free) != 1)) {
> +		pr_err("%s: Freeing cmd (%d) that is not in use!\n",
> +		       __func__, cmd->slot);
> +		return;
> +	}

Seems like its possible for another thread to grab the cmd at this point and
start using it before the re-init below occurs, such that the following
writes could happen when you don't want them to. If you re-init the command
before setting the free bit you should be ok.

> +
> +	cmd->special = 0;
> +	cmd->internal = false;
> +	cmd->sync = false;
> +	cmd->rcb.timeout = 0;
> +
> +	pr_debug("%s: releasing cmd index=%d\n", __func__, cmd->slot);
> +}
> +
> +/**
> + * process_cmd_err() - command error handler
> + * @cmd:	AFU command that experienced the error.
> + * @scp:	SCSI command associated with the AFU command in error.
> + *
> + * Translates error bits from AFU command to SCSI command results.
> + */
> +static void process_cmd_err(struct afu_cmd *cmd, struct scsi_cmnd *scp)
> +{
> +	struct sisl_ioarcb *ioarcb;
> +	struct sisl_ioasa *ioasa;
> +
> +	if (unlikely(!cmd))
> +		return;
> +
> +	ioarcb = &(cmd->rcb);
> +	ioasa = &(cmd->sa);
> +
> +	if (ioasa->rc.flags & SISL_RC_FLAGS_UNDERRUN) {
> +		pr_debug("%s: cmd underrun cmd = %p scp = %p\n",
> +			 __func__, cmd, scp);
> +		scp->result = (DID_ERROR << 16);
> +	}
> +
> +	if (ioasa->rc.flags & SISL_RC_FLAGS_OVERRUN) {
> +		pr_debug("%s: cmd underrun cmd = %p scp = %p\n",
> +			 __func__, cmd, scp);
> +		scp->result = (DID_ERROR << 16);
> +	}
> +
> +	pr_debug("%s: cmd failed afu_rc=%d scsi_rc=%d fc_rc=%d "
> +		 "afu_extra=0x%X, scsi_entra=0x%X, fc_extra=0x%X\n",
> +		 __func__, ioasa->rc.afu_rc, ioasa->rc.scsi_rc,
> +		 ioasa->rc.fc_rc, ioasa->afu_extra, ioasa->scsi_extra,
> +		 ioasa->fc_extra);
> +
> +	if (ioasa->rc.scsi_rc) {
> +		/* We have a SCSI status */
> +		if (ioasa->rc.flags & SISL_RC_FLAGS_SENSE_VALID)
> +			memcpy(scp->sense_buffer, ioasa->sense_data,
> +			       SISL_SENSE_DATA_LEN);
> +		scp->result = ioasa->rc.scsi_rc | (DID_ERROR << 16);

If there is valid sense data here you don't want to set DID_ERROR. By setting
DID_ERROR here, scsi_decide_disposition won't use the sense data to determine
what EH action to perform.

> +	}
> +
> +	/*
> +	 * We encountered an error. Set scp->result based on nature
> +	 * of error.
> +	 */
> +	if (ioasa->rc.fc_rc) {
> +		/* We have an FC status */
> +		switch (ioasa->rc.fc_rc) {
> +		case SISL_FC_RC_RESIDERR:
> +			/* Resid mismatch between adapter and device */
> +		case SISL_FC_RC_TGTABORT:
> +		case SISL_FC_RC_ABORTOK:
> +		case SISL_FC_RC_ABORTFAIL:
> +		case SISL_FC_RC_LINKDOWN:
> +		case SISL_FC_RC_NOLOGI:
> +		case SISL_FC_RC_ABORTPEND:
> +			scp->result = (DID_IMM_RETRY << 16);

So if someone comes and pulls the cables on the card you are going to
return DID_IMM_RETRY for all I/O sent? 

> +			break;
> +		case SISL_FC_RC_RESID:
> +			/* This indicates an FCP resid underrun */
> +			if (!(ioasa->rc.flags & SISL_RC_FLAGS_OVERRUN)) {
> +				/* If the SISL_RC_FLAGS_OVERRUN flag was set,
> +				 * then we will handle this error else where.
> +				 * If not then we must handle it here.
> +				 * This is probably an AFU bug. We will
> +				 * attempt a retry to see if that resolves it.
> +				 */
> +				scp->result = (DID_IMM_RETRY << 16);

DID_IMM_RETRY probably isn't what you want. This will force a retry and NOT
decrement the retry counter, so if it is an AFU bug you'd better be sure
there is no way this is a hard condition, otherwise you'll retry until we
hit the timeout. Returning DID_ERROR might be better.

> +			}
> +			break;
> +		case SISL_FC_RC_WRABORTPEND:
> +		case SISL_FC_RC_NOEXP:
> +		case SISL_FC_RC_INUSE:
> +			scp->result = (DID_ERROR << 16);
> +			break;
> +		}
> +	}
> +
> +	if (ioasa->rc.afu_rc) {
> +		/* We have an AFU error */
> +		switch (ioasa->rc.afu_rc) {
> +		case SISL_AFU_RC_NO_CHANNELS:
> +			scp->result = (DID_MEDIUM_ERROR << 16);
> +			break;
> +		case SISL_AFU_RC_DATA_DMA_ERR:
> +			switch (ioasa->afu_extra) {
> +			case SISL_AFU_DMA_ERR_PAGE_IN:
> +				/* Retry */
> +				scp->result = (DID_IMM_RETRY << 16);
> +				break;
> +			case SISL_AFU_DMA_ERR_INVALID_EA:
> +			default:
> +				scp->result = (DID_ERROR << 16);
> +			}
> +			break;
> +		case SISL_AFU_RC_OUT_OF_DATA_BUFS:
> +			/* Retry */
> +			scp->result = (DID_ALLOC_FAILURE << 16);
> +			break;
> +		default:
> +			scp->result = (DID_ERROR << 16);
> +		}
> +	}
> +}
> +
> +/**
> + * cmd_complete() - command completion handler
> + * @cmd:	AFU command that has completed.
> + *
> + * Prepares and submits command that has either completed or timed out to
> + * the SCSI stack. Checks AFU command back into command pool.
> + */
> +static void cmd_complete(struct afu_cmd *cmd)
> +{
> +	struct scsi_cmnd *scp;
> +	struct afu *afu = cmd->parent;
> +	struct cxlflash_cfg *cfg = afu->parent;
> +
> +	cmd->sa.host_use_b[0] |= B_DONE;

This is done with no locking, but is not an atomic operation. Are there
any cases where two simultaneous writers of this field could result
in losing setting of a bit? Example, they both read at the same time
and read zero, then each writer does their store, so the last one wins.


> +
> +	/* already stopped if timer fired */
> +	del_timer(&cmd->timer);
> +
> +	if (cmd->rcb.scp) {
> +		scp = cmd->rcb.scp;
> +		if (cmd->sa.rc.afu_rc || cmd->sa.rc.scsi_rc ||
> +		    cmd->sa.rc.fc_rc)
> +			process_cmd_err(cmd, scp);
> +		else
> +			scp->result = (DID_OK << 16);
> +
> +		pr_debug("%s: calling scsi_set_resid, scp=%p "
> +			 "result=%X resid=%d\n", __func__,
> +			 cmd->rcb.scp, scp->result, cmd->sa.resid);
> +
> +		scsi_set_resid(scp, cmd->sa.resid);
> +		scsi_dma_unmap(scp);
> +		scp->scsi_done(scp);
> +		cmd->rcb.scp = NULL;
> +		if (cmd->special) {
> +			cfg->tmf_active = false;
> +			wake_up_all(&cfg->tmf_wait_q);
> +		}
> +	}
> +	if (cmd->sync) {
> +		cfg->sync_active = false;
> +		wake_up_all(&cfg->sync_wait_q);
> +	}
> +
> +	/* Done with command */
> +	cxlflash_cmd_checkin(cmd);
> +}
> +
> +/**
> + * send_tmf() - sends a Task Management Function (TMF)
> + * @afu:	AFU to checkout from.
> + * @scp:	SCSI command from stack.
> + * @tmfcmd:	TMF command to send.
> + *
> + * Return:
> + *	0 on success
> + *	SCSI_MLQUEUE_HOST_BUSY when host is busy
> + */
> +static int send_tmf(struct afu *afu, struct scsi_cmnd *scp, u64 tmfcmd)
> +{
> +	struct afu_cmd *cmd;
> +
> +	u32 port_sel = scp->device->channel + 1;
> +	short lflag = 0;
> +	struct Scsi_Host *host = scp->device->host;
> +	struct cxlflash_cfg *cfg = (struct cxlflash_cfg *)host->hostdata;
> +	int rc = 0;
> +
> +	wait_event(cfg->tmf_wait_q, !cfg->tmf_active);
> +
> +	cmd = cxlflash_cmd_checkout(afu);
> +	if (unlikely(!cmd)) {
> +		pr_err("%s: could not get a free command\n", __func__);
> +		rc = SCSI_MLQUEUE_HOST_BUSY;
> +		goto out;
> +	}
> +
> +	cmd->rcb.ctx_id = afu->ctx_hndl;
> +	cmd->rcb.port_sel = port_sel;
> +	cmd->rcb.lun_id = lun_to_lunid(scp->device->lun);
> +
> +	lflag = SISL_REQ_FLAGS_TMF_CMD;
> +
> +	cmd->rcb.req_flags = (SISL_REQ_FLAGS_PORT_LUN_ID |
> +				SISL_REQ_FLAGS_SUP_UNDERRUN | lflag);
> +
> +	/* Stash the scp in the reserved field, for reuse during interrupt */
> +	cmd->rcb.scp = scp;
> +	cmd->special = 0x1;
> +	cfg->tmf_active = true;
> +
> +	cmd->sa.host_use_b[1] = 0;	/* reset retry cnt */
> +
> +	/* Copy the CDB from the cmd passed in */
> +	memcpy(cmd->rcb.cdb, &tmfcmd, sizeof(tmfcmd));
> +
> +	/* Send the command */
> +	rc = cxlflash_send_cmd(afu, cmd);
> +	if (!rc)
> +		wait_event(cfg->tmf_wait_q, !cfg->tmf_active);
> +out:
> +	return rc;
> +
> +}
> +
> +/**
> + * cxlflash_driver_info() - information handler for this host driver
> + * @host:	SCSI host associated with device.
> + *
> + * Return: A string describing the device.
> + */
> +static const char *cxlflash_driver_info(struct Scsi_Host *host)
> +{
> +	return CXLFLASH_ADAPTER_NAME;
> +}
> +
> +/**
> + * cxlflash_queuecommand() - sends a mid-layer request
> + * @host:	SCSI host associated with device.
> + * @scp:	SCSI command to send.
> + *
> + * Return:
> + *	0 on success
> + *	SCSI_MLQUEUE_DEVICE_BUSY when device is busy
> + *	SCSI_MLQUEUE_HOST_BUSY when host is busy
> + */
> +static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp)
> +{
> +	struct cxlflash_cfg *cfg = (struct cxlflash_cfg *)host->hostdata;
> +	struct afu *afu = cfg->afu;
> +	struct pci_dev *pdev = cfg->dev;
> +	struct afu_cmd *cmd;
> +	u32 port_sel = scp->device->channel + 1;
> +	int nseg, i, ncount;
> +	struct scatterlist *sg;
> +	short lflag = 0;
> +	int rc = 0;
> +
> +	pr_debug("%s: (scp=%p) %d/%d/%d/%llu cdb=(%08X-%08X-%08X-%08X)\n",
> +		 __func__, scp, host->host_no, scp->device->channel,
> +		 scp->device->id, scp->device->lun,
> +		 get_unaligned_be32(&((u32 *)scp->cmnd)[0]),
> +		 get_unaligned_be32(&((u32 *)scp->cmnd)[1]),
> +		 get_unaligned_be32(&((u32 *)scp->cmnd)[2]),
> +		 get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
> +
> +	wait_event(cfg->tmf_wait_q, !cfg->tmf_active);

You don't seem to be doing any locking or barrier semantics around the setting
or checking of tmf_active. Additionally, since there is no locking, and its a bit
field it will take a read / modify write to the byte its in, potentially messing
with the other bit fields if you multiple concurrent bit changes going on without locking.

> +
> +	cmd = cxlflash_cmd_checkout(afu);
> +	if (unlikely(!cmd)) {
> +		pr_err("%s: could not get a free command\n", __func__);
> +		rc = SCSI_MLQUEUE_HOST_BUSY;
> +		goto out;
> +	}
> +
> +	cmd->rcb.ctx_id = afu->ctx_hndl;
> +	cmd->rcb.port_sel = port_sel;
> +	cmd->rcb.lun_id = lun_to_lunid(scp->device->lun);
> +
> +	if (scp->sc_data_direction == DMA_TO_DEVICE)
> +		lflag = SISL_REQ_FLAGS_HOST_WRITE;
> +	else
> +		lflag = SISL_REQ_FLAGS_HOST_READ;
> +
> +	cmd->rcb.req_flags = (SISL_REQ_FLAGS_PORT_LUN_ID |
> +				SISL_REQ_FLAGS_SUP_UNDERRUN | lflag);
> +
> +	/* Stash the scp in the reserved field, for reuse during interrupt */
> +	cmd->rcb.scp = scp;
> +
> +	cmd->sa.host_use_b[1] = 0;	/* reset retry cnt */
> +
> +	nseg = scsi_dma_map(scp);
> +	if (unlikely(nseg < 0)) {
> +		dev_err(&pdev->dev, "%s: Fail DMA map! nseg=%d\n",
> +			__func__, nseg);
> +		rc = SCSI_MLQUEUE_DEVICE_BUSY;

This should probably be SCSI_MLQUEUE_HOST_BUSY instead, since it would be
host resources you are short on and not device resources.

> +		goto out;
> +	}
> +
> +	ncount = scsi_sg_count(scp);
> +	scsi_for_each_sg(scp, sg, ncount, i) {
> +		cmd->rcb.data_len = (sg_dma_len(sg));
> +		cmd->rcb.data_ea = (sg_dma_address(sg));

What's up with the extra parenthesis?

> +	}
> +
> +	/* Copy the CDB from the scsi_cmnd passed in */
> +	memcpy(cmd->rcb.cdb, scp->cmnd, sizeof(cmd->rcb.cdb));
> +
> +	/* Send the command */
> +	rc = cxlflash_send_cmd(afu, cmd);
> +
> +out:
> +	return rc;
> +}
> +
> +/**
> + * cxlflash_eh_device_reset_handler() - reset a single LUN
> + * @scp:	SCSI command to send.
> + *
> + * Return:
> + *	SUCCESS as defined in scsi/scsi.h
> + *	FAILED as defined in scsi/scsi.h
> + */
> +static int cxlflash_eh_device_reset_handler(struct scsi_cmnd *scp)
> +{
> +	int rc = SUCCESS;
> +	struct Scsi_Host *host = scp->device->host;
> +	struct cxlflash_cfg *cfg = (struct cxlflash_cfg *)host->hostdata;
> +	struct afu *afu = cfg->afu;
> +
> +	pr_debug("%s: (scp=%p) %d/%d/%d/%llu "
> +		 "cdb=(%08X-%08X-%08X-%08X)\n", __func__, scp,
> +		 host->host_no, scp->device->channel,
> +		 scp->device->id, scp->device->lun,
> +		 get_unaligned_be32(&((u32 *)scp->cmnd)[0]),
> +		 get_unaligned_be32(&((u32 *)scp->cmnd)[1]),
> +		 get_unaligned_be32(&((u32 *)scp->cmnd)[2]),
> +		 get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
> +
> +	scp->result = (DID_OK << 16);

Don't think this should be needed. scsi eh will requeue or fail the
command as appropriate.

> +	send_tmf(afu, scp, TMF_LUN_RESET);
> +
> +	pr_debug("%s: returning rc=%d\n", __func__, rc);
> +	return rc;
> +}
> +
> +/**
> + * cxlflash_eh_host_reset_handler() - reset the host adapter
> + * @scp:	SCSI command from stack identifying host.
> + *
> + * Return:
> + *	SUCCESS as defined in scsi/scsi.h
> + *	FAILED as defined in scsi/scsi.h
> + */
> +static int cxlflash_eh_host_reset_handler(struct scsi_cmnd *scp)
> +{
> +	int rc = SUCCESS;
> +	int rcr = 0;
> +	struct Scsi_Host *host = scp->device->host;
> +	struct cxlflash_cfg *cfg = (struct cxlflash_cfg *)host->hostdata;
> +
> +	pr_debug("%s: (scp=%p) %d/%d/%d/%llu "
> +		 "cdb=(%08X-%08X-%08X-%08X)\n", __func__, scp,
> +		 host->host_no, scp->device->channel,
> +		 scp->device->id, scp->device->lun,
> +		 get_unaligned_be32(&((u32 *)scp->cmnd)[0]),
> +		 get_unaligned_be32(&((u32 *)scp->cmnd)[1]),
> +		 get_unaligned_be32(&((u32 *)scp->cmnd)[2]),
> +		 get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
> +
> +	scp->result = (DID_OK << 16);

Don't think this should be needed. scsi eh will requeue or fail the
command as appropriate.

> +	rcr = cxlflash_afu_reset(cfg);
> +	if (rcr == 0)
> +		rc = SUCCESS;
> +	else
> +		rc = FAILED;
> +
> +	pr_debug("%s: returning rc=%d\n", __func__, rc);
> +	return rc;
> +}
> +
> +/**
> + * cxlflash_change_queue_depth() - change the queue depth for the device
> + * @sdev:	SCSI device destined for queue depth change.
> + * @qdepth:	Requested queue depth value to set.
> + *
> + * The requested queue depth is capped to the maximum supported value.
> + *
> + * Return: The actual queue depth set.
> + */
> +static int cxlflash_change_queue_depth(struct scsi_device *sdev, int qdepth)
> +{
> +
> +	if (qdepth > CXLFLASH_MAX_CMDS_PER_LUN)
> +		qdepth = CXLFLASH_MAX_CMDS_PER_LUN;
> +
> +	scsi_change_queue_depth(sdev, qdepth);
> +	return sdev->queue_depth;
> +}
> +
> +/**
> + * cxlflash_show_port_status() - queries and presents the current port status
> + * @dev:	Generic device associated with the host owning the port.
> + * @attr:	Device attribute representing the port.
> + * @buf:	Buffer of length PAGE_SIZE to report back port status in ASCII.
> + *
> + * Return: The size of the ASCII string returned in @buf.
> + */
> +static ssize_t cxlflash_show_port_status(struct device *dev,
> +					 struct device_attribute *attr,
> +					 char *buf)
> +{
> +	struct Scsi_Host *shost = class_to_shost(dev);
> +	struct cxlflash_cfg *cfg = (struct cxlflash_cfg *)shost->hostdata;
> +	struct afu *afu = cfg->afu;
> +
> +	char *disp_status;
> +	int rc;
> +	u32 port;
> +	u64 status;
> +	volatile u64 *fc_regs;
> +
> +	rc = kstrtouint((attr->attr.name + 4), 10, &port);
> +	if (rc || (port > NUM_FC_PORTS))
> +		return 0;
> +
> +	fc_regs = &afu->afu_map->global.fc_regs[port][0];
> +	status =
> +	    (readq_be(&fc_regs[FC_MTIP_STATUS / 8]) & FC_MTIP_STATUS_MASK);
> +
> +	if (status == FC_MTIP_STATUS_ONLINE)
> +		disp_status = "online";
> +	else if (status == FC_MTIP_STATUS_OFFLINE)
> +		disp_status = "offline";
> +	else
> +		disp_status = "unknown";
> +
> +	return snprintf(buf, PAGE_SIZE, "%s\n", disp_status);
> +}
> +
> +/**
> + * cxlflash_show_lun_mode() - presents the current LUN mode of the host
> + * @dev:	Generic device associated with the host.
> + * @attr:	Device attribute representing the lun mode.
> + * @buf:	Buffer of length PAGE_SIZE to report back the LUN mode in ASCII.
> + *
> + * Return: The size of the ASCII string returned in @buf.
> + */
> +static ssize_t cxlflash_show_lun_mode(struct device *dev,
> +				      struct device_attribute *attr, char *buf)
> +{
> +	struct Scsi_Host *shost = class_to_shost(dev);
> +	struct cxlflash_cfg *cfg = (struct cxlflash_cfg *)shost->hostdata;
> +	struct afu *afu = cfg->afu;
> +
> +	return snprintf(buf, PAGE_SIZE, "%u\n", afu->internal_lun);
> +}
> +
> +/**
> + * cxlflash_store_lun_mode() - sets the LUN mode of the host
> + * @dev:	Generic device associated with the host.
> + * @attr:	Device attribute representing the lun mode.
> + * @buf:	Buffer of length PAGE_SIZE containing the LUN mode in ASCII.
> + * @count:	Length of data resizing in @buf.
> + *
> + * The CXL Flash AFU supports a dummy LUN mode where the external
> + * links and storage are not required. Space on the FPGA is used
> + * to create 1 or 2 small LUNs which are presented to the system
> + * as if they were a normal storage device. This feature is useful
> + * during development and also provides manufacturing with a way
> + * to test the AFU without an actual device.
> + *
> + * 0 = external LUN[s] (default)
> + * 1 = internal LUN (1 x 64K, 512B blocks, id 0)
> + * 2 = internal LUN (1 x 64K, 4K blocks, id 0)
> + * 3 = internal LUN (2 x 32K, 512B blocks, ids 0,1)
> + * 4 = internal LUN (2 x 32K, 4K blocks, ids 0,1)
> + *
> + * Return: The size of the ASCII string returned in @buf.
> + */
> +static ssize_t cxlflash_store_lun_mode(struct device *dev,
> +				       struct device_attribute *attr,
> +				       const char *buf, size_t count)
> +{
> +	struct Scsi_Host *shost = class_to_shost(dev);
> +	struct cxlflash_cfg *cfg = (struct cxlflash_cfg *)shost->hostdata;
> +	struct afu *afu = cfg->afu;
> +	int rc;
> +	u32 lun_mode;
> +
> +	rc = kstrtouint(buf, 10, &lun_mode);
> +	if (!rc && (lun_mode < 5) && (lun_mode != afu->internal_lun)) {
> +		afu->internal_lun = lun_mode;
> +		cxlflash_afu_reset(cfg);
> +		scsi_scan_host(cfg->host);
> +	}
> +
> +	return count;
> +}
> +
> +/**
> + * cxlflash_show_dev_mode() - presents the current mode of the device
> + * @dev:	Generic device associated with the device.
> + * @attr:	Device attribute representing the device mode.
> + * @buf:	Buffer of length PAGE_SIZE to report back the dev mode in ASCII.
> + *
> + * Return: The size of the ASCII string returned in @buf.
> + */
> +static ssize_t cxlflash_show_dev_mode(struct device *dev,
> +				      struct device_attribute *attr, char *buf)
> +{
> +	struct scsi_device *sdev = to_scsi_device(dev);
> +	void *lun_info = (void *)sdev->hostdata;
> +	char *legacy = "legacy",
> +	     *superpipe = "superpipe";
> +
> +	return snprintf(buf, PAGE_SIZE, "%s\n", lun_info ? superpipe : legacy);

Why bother creating these legacy and superpipe locals at all? Just do:

return snprintf(buf, PAGE_SIZE, "%s\n", lun_info ? "superpipe" : "legacy");

> +}
> +
> +/**
> + * cxlflash_wait_for_pci_err_recovery() - wait for error recovery during probe
> + * @cxlflash:	Internal structure associated with the host.
> + */
> +static void cxlflash_wait_for_pci_err_recovery(struct cxlflash_cfg *cfg)
> +{
> +	struct pci_dev *pdev = cfg->dev;
> +
> +	if (pci_channel_offline(pdev))
> +		wait_event_timeout(cfg->eeh_wait_q,
> +				   !pci_channel_offline(pdev),
> +				   CXLFLASH_PCI_ERROR_RECOVERY_TIMEOUT);
> +}
> +
> +/*
> + * Host attributes
> + */
> +static DEVICE_ATTR(port0, S_IRUGO, cxlflash_show_port_status, NULL);
> +static DEVICE_ATTR(port1, S_IRUGO, cxlflash_show_port_status, NULL);
> +static DEVICE_ATTR(lun_mode, S_IRUGO | S_IWUSR, cxlflash_show_lun_mode,
> +		   cxlflash_store_lun_mode);
> +
> +static struct device_attribute *cxlflash_host_attrs[] = {
> +	&dev_attr_port0,
> +	&dev_attr_port1,
> +	&dev_attr_lun_mode,
> +	NULL
> +};
> +
> +/*
> + * Device attributes
> + */
> +static DEVICE_ATTR(mode, S_IRUGO, cxlflash_show_dev_mode, NULL);
> +
> +static struct device_attribute *cxlflash_dev_attrs[] = {
> +	&dev_attr_mode,
> +	NULL
> +};
> +
> +/*
> + * Host template
> + */
> +static struct scsi_host_template driver_template = {
> +	.module = THIS_MODULE,
> +	.name = CXLFLASH_ADAPTER_NAME,
> +	.info = cxlflash_driver_info,
> +	.proc_name = CXLFLASH_NAME,
> +	.queuecommand = cxlflash_queuecommand,
> +	.eh_device_reset_handler = cxlflash_eh_device_reset_handler,
> +	.eh_host_reset_handler = cxlflash_eh_host_reset_handler,
> +	.change_queue_depth = cxlflash_change_queue_depth,
> +	.cmd_per_lun = 16,
> +	.can_queue = CXLFLASH_MAX_CMDS,
> +	.this_id = -1,
> +	.sg_tablesize = SG_NONE,	/* No scatter gather support. */
> +	.max_sectors = CXLFLASH_MAX_SECTORS,
> +	.use_clustering = ENABLE_CLUSTERING,
> +	.shost_attrs = cxlflash_host_attrs,
> +	.sdev_attrs = cxlflash_dev_attrs,
> +};
> +
> +/*
> + * Device dependent values
> + */
> +static struct dev_dependent_vals dev_corsa_vals = { CXLFLASH_MAX_SECTORS };
> +
> +/*
> + * PCI device binding table
> + */
> +static struct pci_device_id cxlflash_pci_table[] = {
> +	{PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CORSA,
> +	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, (kernel_ulong_t)&dev_corsa_vals},
> +	{}
> +};
> +
> +MODULE_DEVICE_TABLE(pci, cxlflash_pci_table);
> +
> +/**
> + * free_mem() - free memory associated with the AFU
> + * @cxlflash:	Internal structure associated with the host.
> + *
> + * As part of draining the AFU command pool, the timers of each
> + * command are ensured to be stopped.
> + */
> +static void free_mem(struct cxlflash_cfg *cfg)
> +{
> +	int i;
> +	char *buf = NULL;
> +	struct afu *afu = cfg->afu;
> +
> +	if (cfg->afu) {
> +		for (i = 0; i < CXLFLASH_NUM_CMDS; i++) {
> +			buf = afu->cmd[i].buf;
> +			if (!((u64)buf & (PAGE_SIZE - 1)))
> +				free_page((unsigned long)buf);
> +		}
> +
> +		free_pages((unsigned long)afu, get_order(sizeof(struct afu)));
> +		cfg->afu = NULL;
> +	}
> +}
> +
> +/**
> + * stop_afu() - stops the AFU command timers and unmaps the MMIO space
> + * @cxlflash:	Internal structure associated with the host.
> + *
> + * Safe to call with AFU in a partially allocated/initialized state.
> + */
> +static void stop_afu(struct cxlflash_cfg *cfg)
> +{
> +	int i;
> +	struct afu *afu = cfg->afu;
> +
> +	if (!afu) {
> +		pr_debug("%s: returning because afu is NULL\n", __func__);
> +		return;
> +	}
> +
> +	/* Need to stop timers before unmapping */
> +	for (i = 0; i < CXLFLASH_NUM_CMDS; i++) {
> +		if (afu->cmd[i].timer.function)
> +			del_timer_sync(&afu->cmd[i].timer);
> +	}
> +
> +	if (afu->afu_map) {
> +		cxl_psa_unmap((void *)afu->afu_map);
> +		afu->afu_map = NULL;
> +	}
> +}
> +
> +/**
> + * term_mc() - terminates the master context
> + * @cxlflash:	Internal structure associated with the host.
> + * @level:	Depth of allocation, where to begin waterfall tear down.
> + *
> + * Safe to call with AFU/MC in partially allocated/initialized state.
> + */
> +static void term_mc(struct cxlflash_cfg *cfg, enum undo_level level)
> +{
> +	int rc = 0;
> +	struct afu *afu = cfg->afu;
> +
> +	if (!afu || !cfg->mcctx) {
> +		pr_err("%s: returning from term_mc with NULL afu or MC\n",
> +		       __func__);
> +		return;
> +	}
> +
> +	switch (level) {
> +	case UNDO_START:
> +		rc = cxl_stop_context(cfg->mcctx);
> +		BUG_ON(rc);
> +	case UNMAP_THREE:
> +		pr_debug("%s: before unmap 3\n", __func__);
> +		cxl_unmap_afu_irq(cfg->mcctx, 3, afu);
> +	case UNMAP_TWO:
> +		pr_debug("%s: before unmap 2\n", __func__);
> +		cxl_unmap_afu_irq(cfg->mcctx, 2, afu);
> +	case UNMAP_ONE:
> +		pr_debug("%s: before unmap 1\n", __func__);
> +		cxl_unmap_afu_irq(cfg->mcctx, 1, afu);
> +	case FREE_IRQ:
> +		pr_debug("%s: before cxl_free_afu_irqs\n", __func__);
> +		cxl_free_afu_irqs(cfg->mcctx);
> +	case RELEASE_CONTEXT:
> +		cfg->mcctx = NULL;
> +	}
> +}
> +
> +/**
> + * term_afu() - terminates the AFU
> + * @cxlflash:	Internal structure associated with the host.
> + *
> + * Safe to call with AFU/MC in partially allocated/initialized state.
> + */
> +static void term_afu(struct cxlflash_cfg *cfg)
> +{
> +	term_mc(cfg, UNDO_START);
> +
> +	/* Need to stop timers before unmapping */
> +	if (cfg->afu)
> +		stop_afu(cfg);
> +
> +	pr_debug("%s: returning\n", __func__);
> +}
> +
> +/**
> + * cxlflash_remove() - PCI entry point to tear down host
> + * @pdev:	PCI device associated with the host.
> + *
> + * Safe to use as a cleanup in partially allocated/initialized state.
> + */
> +static void cxlflash_remove(struct pci_dev *pdev)
> +{
> +	struct cxlflash_cfg *cfg = pci_get_drvdata(pdev);
> +
> +	dev_dbg(&pdev->dev, "%s: enter cxlflash_remove!\n", __func__);
> +
> +	wait_event(cfg->tmf_wait_q, !cfg->tmf_active);
> +
> +	switch (cfg->init_state) {
> +	case INIT_STATE_SCSI:
> +		scsi_remove_host(cfg->host);
> +		dev_dbg(&pdev->dev, "%s: after scsi_remove_host!\n", __func__);
> +		scsi_host_put(cfg->host);
> +		dev_dbg(&pdev->dev, "%s: after scsi_host_put!\n", __func__);

Would probably be good to scrub the code for some of these debug statements.
Some are fine to leave in the code if useful, but ones like these above should
probably go.

> +		/* Fall through */
> +	case INIT_STATE_PCI:
> +		pci_release_regions(cfg->dev);
> +		pci_disable_device(pdev);
> +	case INIT_STATE_AFU:
> +		term_afu(cfg);
> +		dev_dbg(&pdev->dev, "%s: after struct term_afu!\n",
> +			__func__);
> +	case INIT_STATE_NONE:
> +		flush_work(&cfg->work_q);
> +		free_mem(cfg);
> +		break;
> +	}
> +
> +	pr_debug("%s: returning\n", __func__);
> +}
> +
> +/**
> + * alloc_mem() - allocates the AFU and its command pool
> + * @cxlflash:	Internal structure associated with the host.
> + *
> + * A partially allocated state remains on failure.
> + *
> + * Return:
> + *	0 on success
> + *	-ENOMEM on failure to allocate memory
> + */
> +static int alloc_mem(struct cxlflash_cfg *cfg)
> +{
> +	int rc = 0;
> +	int i;
> +	char *buf = NULL;
> +
> +	/* This allocation is about 12K, i.e. only 1 64k page
> +	 * and upto 4 4k pages
> +	 */
> +	cfg->afu = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
> +					    get_order(sizeof(struct afu)));
> +	if (unlikely(!cfg->afu)) {
> +		pr_err("%s: cannot get %d free pages\n",
> +		       __func__, get_order(sizeof(struct afu)));
> +		rc = -ENOMEM;
> +		goto out;
> +	}
> +	cfg->afu->parent = cfg;
> +	cfg->afu->afu_map = NULL;
> +
> +	for (i = 0; i < CXLFLASH_NUM_CMDS; buf += CMD_BUFSIZE, i++) {
> +		if (!((u64)buf & (PAGE_SIZE - 1))) {
> +			buf = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
> +			if (unlikely(!buf)) {
> +				pr_err("%s: Allocate command buffers fail!\n",
> +				       __func__);
> +				rc = -ENOMEM;
> +				free_mem(cfg);
> +				goto out;
> +			}
> +		}
> +
> +		cfg->afu->cmd[i].buf = buf;
> +		atomic_set(&cfg->afu->cmd[i].free, 1);
> +		cfg->afu->cmd[i].slot = i;
> +		cfg->afu->cmd[i].special = 0;
> +	}
> +
> +out:
> +	return rc;
> +}
> +
> +/**
> + * init_pci() - initializes the host as a PCI device
> + * @cxlflash:	Internal structure associated with the host.
> + *
> + * Return:
> + *	0 on success
> + *	-EIO on unable to communicate with device
> + *	A return code from the PCI sub-routines
> + */
> +static int init_pci(struct cxlflash_cfg *cfg)
> +{
> +	struct pci_dev *pdev = cfg->dev;
> +	int rc = 0;
> +
> +	cfg->cxlflash_regs_pci = pci_resource_start(pdev, 0);
> +	rc = pci_request_regions(pdev, CXLFLASH_NAME);
> +	if (rc < 0) {
> +		dev_err(&pdev->dev,
> +			"%s: Couldn't register memory range of registers\n",
> +			__func__);
> +		goto out;
> +	}
> +
> +	rc = pci_enable_device(pdev);
> +	if (rc || pci_channel_offline(pdev)) {
> +		if (pci_channel_offline(pdev)) {
> +			cxlflash_wait_for_pci_err_recovery(cfg);
> +			rc = pci_enable_device(pdev);
> +		}
> +
> +		if (rc) {
> +			dev_err(&pdev->dev, "%s: Cannot enable adapter\n",
> +				__func__);
> +			cxlflash_wait_for_pci_err_recovery(cfg);
> +			goto out_release_regions;
> +		}
> +	}
> +
> +	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
> +	if (rc < 0) {
> +		dev_dbg(&pdev->dev, "%s: Failed to set 64 bit PCI DMA mask\n",
> +			__func__);
> +		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
> +	}
> +
> +	if (rc < 0) {
> +		dev_err(&pdev->dev, "%s: Failed to set PCI DMA mask\n",
> +			__func__);
> +		goto out_disable;
> +	}
> +
> +	pci_set_master(pdev);
> +
> +	if (pci_channel_offline(pdev)) {
> +		cxlflash_wait_for_pci_err_recovery(cfg);
> +		if (pci_channel_offline(pdev)) {
> +			rc = -EIO;
> +			goto out_msi_disable;
> +		}
> +	}
> +
> +	rc = pci_save_state(pdev);
> +
> +	if (rc != PCIBIOS_SUCCESSFUL) {
> +		dev_err(&pdev->dev, "%s: Failed to save PCI config space\n",
> +			__func__);
> +		rc = -EIO;
> +		goto cleanup_nolog;
> +	}
> +
> +out:
> +	pr_debug("%s: returning rc=%d\n", __func__, rc);
> +	return rc;
> +
> +cleanup_nolog:
> +out_msi_disable:
> +	cxlflash_wait_for_pci_err_recovery(cfg);
> +out_disable:
> +	pci_disable_device(pdev);
> +out_release_regions:
> +	pci_release_regions(pdev);
> +	goto out;
> +
> +}
> +
> +/**
> + * init_scsi() - adds the host to the SCSI stack and kicks off host scan
> + * @cxlflash:	Internal structure associated with the host.
> + *
> + * Return:
> + *	0 on success
> + *	A return code from adding the host
> + */
> +static int init_scsi(struct cxlflash_cfg *cfg)
> +{
> +	struct pci_dev *pdev = cfg->dev;
> +	int rc = 0;
> +
> +	dev_dbg(&pdev->dev, "%s: before scsi_add_host\n", __func__);
> +	rc = scsi_add_host(cfg->host, &pdev->dev);
> +	if (rc) {
> +		dev_err(&pdev->dev, "%s: scsi_add_host failed (rc=%d)\n",
> +			__func__, rc);
> +		goto out;
> +	}
> +
> +	dev_dbg(&pdev->dev, "%s: before scsi_scan_host\n", __func__);
> +	scsi_scan_host(cfg->host);
> +
> +out:
> +	pr_debug("%s: returning rc=%d\n", __func__, rc);
> +	return rc;
> +}
> +
> +/**
> + * set_port_online() - transitions the specified host FC port to online state
> + * @fc_regs:	Top of MMIO region defined for specified port.
> + *
> + * The provided MMIO region must be mapped prior to call. Online state means
> + * that the FC link layer has synced, completed the handshaking process, and
> + * is ready for login to start.
> + */
> +static void set_port_online(volatile u64 *fc_regs)
> +{
> +	u64 cmdcfg;
> +
> +	cmdcfg = readq_be(&fc_regs[FC_MTIP_CMDCONFIG / 8]);
> +	cmdcfg &= (~FC_MTIP_CMDCONFIG_OFFLINE);	/* clear OFF_LINE */
> +	cmdcfg |= (FC_MTIP_CMDCONFIG_ONLINE);	/* set ON_LINE */
> +	writeq_be(cmdcfg, &fc_regs[FC_MTIP_CMDCONFIG / 8]);
> +}
> +
> +/**
> + * set_port_offline() - transitions the specified host FC port to offline state
> + * @fc_regs:	Top of MMIO region defined for specified port.
> + *
> + * The provided MMIO region must be mapped prior to call.
> + */
> +static void set_port_offline(volatile u64 *fc_regs)
> +{
> +	u64 cmdcfg;
> +
> +	cmdcfg = readq_be(&fc_regs[FC_MTIP_CMDCONFIG / 8]);
> +	cmdcfg &= (~FC_MTIP_CMDCONFIG_ONLINE);	/* clear ON_LINE */
> +	cmdcfg |= (FC_MTIP_CMDCONFIG_OFFLINE);	/* set OFF_LINE */
> +	writeq_be(cmdcfg, &fc_regs[FC_MTIP_CMDCONFIG / 8]);
> +}
> +
> +/**
> + * wait_port_online() - waits for the specified host FC port come online
> + * @fc_regs:	Top of MMIO region defined for specified port.
> + * @delay_us:	Number of microseconds to delay between reading port status.
> + * @nretry:	Number of cycles to retry reading port status.
> + *
> + * The provided MMIO region must be mapped prior to call. This will timeout
> + * when the cable is not plugged in.
> + *
> + * Return:
> + *	TRUE (1) when the specified port is online
> + *	FALSE (0) when the specified port fails to come online after timeout
> + *	-EINVAL when @delay_us is less than 1000
> + */
> +static int wait_port_online(volatile u64 *fc_regs,
> +			    useconds_t delay_us, unsigned int nretry)
> +{
> +	u64 status;
> +
> +	if (delay_us < 1000) {
> +		pr_err("%s: invalid delay specified %d\n", __func__, delay_us);
> +		return -EINVAL;
> +	}
> +
> +	do {
> +		msleep(delay_us / 1000);
> +		status = readq_be(&fc_regs[FC_MTIP_STATUS / 8]);
> +	} while ((status & FC_MTIP_STATUS_MASK) != FC_MTIP_STATUS_ONLINE &&
> +		 nretry--);
> +
> +	return ((status & FC_MTIP_STATUS_MASK) == FC_MTIP_STATUS_ONLINE);
> +}
> +
> +/**
> + * wait_port_offline() - waits for the specified host FC port go offline
> + * @fc_regs:	Top of MMIO region defined for specified port.
> + * @delay_us:	Number of microseconds to delay between reading port status.
> + * @nretry:	Number of cycles to retry reading port status.
> + *
> + * The provided MMIO region must be mapped prior to call.
> + *
> + * Return:
> + *	TRUE (1) when the specified port is offline
> + *	FALSE (0) when the specified port fails to go offline after timeout
> + *	-EINVAL when @delay_us is less than 1000
> + */
> +static int wait_port_offline(volatile u64 *fc_regs,
> +			     useconds_t delay_us, unsigned int nretry)
> +{
> +	u64 status;
> +
> +	if (delay_us < 1000) {
> +		pr_err("%s: invalid delay specified %d\n", __func__, delay_us);
> +		return -EINVAL;
> +	}
> +
> +	do {
> +		msleep(delay_us / 1000);
> +		status = readq_be(&fc_regs[FC_MTIP_STATUS / 8]);
> +	} while ((status & FC_MTIP_STATUS_MASK) != FC_MTIP_STATUS_OFFLINE &&
> +		 nretry--);
> +
> +	return ((status & FC_MTIP_STATUS_MASK) == FC_MTIP_STATUS_OFFLINE);
> +}
> +
> +/**
> + * afu_set_wwpn() - configures the WWPN for the specified host FC port
> + * @afu:	AFU associated with the host that owns the specified FC port.
> + * @port:	Port number being configured.
> + * @fc_regs:	Top of MMIO region defined for specified port.
> + * @wwpn:	The world-wide-port-number previously discovered for port.
> + *
> + * The provided MMIO region must be mapped prior to call. As part of the
> + * sequence to configure the WWPN, the port is toggled offline and then back
> + * online. This toggling action can cause this routine to delay up to a few
> + * seconds. When configured to use the internal LUN feature of the AFU, a
> + * failure to come online is overridden.
> + *
> + * Return:
> + *	0 when the WWPN is successfully written and the port comes back online
> + *	-1 when the port fails to go offline or come back up online
> + */
> +static int afu_set_wwpn(struct afu *afu, int port,
> +			volatile u64 *fc_regs, u64 wwpn)
> +{
> +	int ret = 0;
> +
> +	set_port_offline(fc_regs);
> +
> +	if (!wait_port_offline(fc_regs, FC_PORT_STATUS_RETRY_INTERVAL_US,
> +			       FC_PORT_STATUS_RETRY_CNT)) {
> +		pr_debug("%s: wait on port %d to go offline timed out\n",
> +			 __func__, port);
> +		ret = -1; /* but continue on to leave the port back online */
> +	}
> +
> +	if (ret == 0)
> +		writeq_be(wwpn, &fc_regs[FC_PNAME / 8]);
> +
> +	set_port_online(fc_regs);
> +
> +	if (!wait_port_online(fc_regs, FC_PORT_STATUS_RETRY_INTERVAL_US,
> +			      FC_PORT_STATUS_RETRY_CNT)) {
> +		pr_debug("%s: wait on port %d to go online timed out\n",
> +			 __func__, port);
> +		ret = -1;
> +
> +		/*
> +		 * Override for internal lun!!!
> +		 */
> +		if (afu->internal_lun) {
> +			pr_debug("%s: Overriding port %d online timeout!!!\n",
> +				 __func__, port);
> +			ret = 0;
> +		}
> +	}
> +
> +	pr_debug("%s: returning rc=%d\n", __func__, ret);
> +
> +	return ret;
> +}
> +
> +/**
> + * afu_link_reset() - resets the specified host FC port
> + * @afu:	AFU associated with the host that owns the specified FC port.
> + * @port:	Port number being configured.
> + * @fc_regs:	Top of MMIO region defined for specified port.
> + *
> + * The provided MMIO region must be mapped prior to call. The sequence to
> + * reset the port involves toggling it offline and then back online. This
> + * action can cause this routine to delay up to a few seconds. An effort
> + * is made to maintain link with the device by switching to host to use
> + * the alternate port exclusively while the reset takes place.
> + * failure to come online is overridden.
> + */
> +static void afu_link_reset(struct afu *afu, int port, volatile u64 *fc_regs)
> +{
> +	u64 port_sel;
> +
> +	/* first switch the AFU to the other links, if any */
> +	port_sel = readq_be(&afu->afu_map->global.regs.afu_port_sel);
> +	port_sel &= ~(1 << port);
> +	writeq_be(port_sel, &afu->afu_map->global.regs.afu_port_sel);
> +	cxlflash_afu_sync(afu, 0, 0, AFU_GSYNC);
> +
> +	set_port_offline(fc_regs);
> +	if (!wait_port_offline(fc_regs, FC_PORT_STATUS_RETRY_INTERVAL_US,
> +			       FC_PORT_STATUS_RETRY_CNT))
> +		pr_err("%s: wait on port %d to go offline timed out\n",
> +		       __func__, port);
> +
> +	set_port_online(fc_regs);
> +	if (!wait_port_online(fc_regs, FC_PORT_STATUS_RETRY_INTERVAL_US,
> +			      FC_PORT_STATUS_RETRY_CNT))
> +		pr_err("%s: wait on port %d to go online timed out\n",
> +		       __func__, port);
> +
> +	/* switch back to include this port */
> +	port_sel |= (1 << port);
> +	writeq_be(port_sel, &afu->afu_map->global.regs.afu_port_sel);
> +	cxlflash_afu_sync(afu, 0, 0, AFU_GSYNC);
> +
> +	pr_debug("%s: returning port_sel=%lld\n", __func__, port_sel);
> +}
> +
> +/*
> + * Asynchronous interrupt information table
> + */
> +static const struct asyc_intr_info ainfo[] = {
> +	{SISL_ASTATUS_FC0_OTHER, "fc 0: other error", 0,
> +		CLR_FC_ERROR | LINK_RESET},
> +	{SISL_ASTATUS_FC0_LOGO, "fc 0: target initiated LOGO", 0, 0},
> +	{SISL_ASTATUS_FC0_CRC_T, "fc 0: CRC threshold exceeded", 0, LINK_RESET},
> +	{SISL_ASTATUS_FC0_LOGI_R, "fc 0: login timed out, retrying", 0, 0},
> +	{SISL_ASTATUS_FC0_LOGI_F, "fc 0: login failed", 0, CLR_FC_ERROR},
> +	{SISL_ASTATUS_FC0_LOGI_S, "fc 0: login succeeded", 0, 0},
> +	{SISL_ASTATUS_FC0_LINK_DN, "fc 0: link down", 0, 0},
> +	{SISL_ASTATUS_FC0_LINK_UP, "fc 0: link up", 0, 0},

Does "fc 0" here mean "port 0"?

> +
> +	{SISL_ASTATUS_FC1_OTHER, "fc 1: other error", 1,
> +	 CLR_FC_ERROR | LINK_RESET},
> +	{SISL_ASTATUS_FC1_LOGO, "fc 1: target initiated LOGO", 1, 0},
> +	{SISL_ASTATUS_FC1_CRC_T, "fc 1: CRC threshold exceeded", 1, LINK_RESET},
> +	{SISL_ASTATUS_FC1_LOGI_R, "fc 1: login timed out, retrying", 1, 0},
> +	{SISL_ASTATUS_FC1_LOGI_F, "fc 1: login failed", 1, CLR_FC_ERROR},
> +	{SISL_ASTATUS_FC1_LOGI_S, "fc 1: login succeeded", 1, 0},
> +	{SISL_ASTATUS_FC1_LINK_DN, "fc 1: link down", 1, 0},
> +	{SISL_ASTATUS_FC1_LINK_UP, "fc 1: link up", 1, 0},
> +	{0x0, "", 0, 0}		/* terminator */
> +};
> +
> +/**
> + * find_ainfo() - locates and returns asynchronous interrupt information
> + * @status:	Status code set by AFU on error.
> + *
> + * Return: The located information or NULL when the status code is invalid.
> + */
> +static const struct asyc_intr_info *find_ainfo(u64 status)
> +{
> +	const struct asyc_intr_info *info;
> +
> +	for (info = &ainfo[0]; info->status; info++)
> +		if (info->status == status)
> +			return info;
> +
> +	return NULL;
> +}
> +
> +/**
> + * afu_err_intr_init() - clears and initializes the AFU for error interrupts
> + * @afu:	AFU associated with the host.
> + */
> +static void afu_err_intr_init(struct afu *afu)
> +{
> +	int i;
> +	volatile u64 reg;
> +
> +	/* global async interrupts: AFU clears afu_ctrl on context exit
> +	 * if async interrupts were sent to that context. This prevents
> +	 * the AFU form sending further async interrupts when
> +	 * there is
> +	 * nobody to receive them.
> +	 */
> +
> +	/* mask all */
> +	writeq_be(-1ULL, &afu->afu_map->global.regs.aintr_mask);
> +	/* set LISN# to send and point to master context */
> +	reg = ((u64) (((afu->ctx_hndl << 8) | SISL_MSI_ASYNC_ERROR)) << 40);
> +
> +	if (afu->internal_lun)
> +		reg |= 1;	/* Bit 63 indicates local lun */
> +	writeq_be(reg, &afu->afu_map->global.regs.afu_ctrl);
> +	/* clear all */
> +	writeq_be(-1ULL, &afu->afu_map->global.regs.aintr_clear);
> +	/* unmask bits that are of interest */
> +	/* note: afu can send an interrupt after this step */
> +	writeq_be(SISL_ASTATUS_MASK, &afu->afu_map->global.regs.aintr_mask);
> +	/* clear again in case a bit came on after previous clear but before */
> +	/* unmask */
> +	writeq_be(-1ULL, &afu->afu_map->global.regs.aintr_clear);
> +
> +	/* Clear/Set internal lun bits */
> +	reg = readq_be(&afu->afu_map->global.fc_regs[0][FC_CONFIG2 / 8]);
> +	pr_debug("%s: ilun p0 = %016llX\n", __func__, reg);
> +	reg &= SISL_FC_INTERNAL_MASK;
> +	if (afu->internal_lun)
> +		reg |= ((u64)(afu->internal_lun - 1) << SISL_FC_INTERNAL_SHIFT);
> +	pr_debug("%s: ilun p0 = %016llX\n", __func__, reg);
> +	writeq_be(reg, &afu->afu_map->global.fc_regs[0][FC_CONFIG2 / 8]);
> +
> +	/* now clear FC errors */
> +	for (i = 0; i < NUM_FC_PORTS; i++) {
> +		writeq_be(0xFFFFFFFFU,
> +			  &afu->afu_map->global.fc_regs[i][FC_ERROR / 8]);
> +		writeq_be(0, &afu->afu_map->global.fc_regs[i][FC_ERRCAP / 8]);
> +	}
> +
> +	/* sync interrupts for master's IOARRIN write */
> +	/* note that unlike asyncs, there can be no pending sync interrupts */
> +	/* at this time (this is a fresh context and master has not written */
> +	/* IOARRIN yet), so there is nothing to clear. */
> +
> +	/* set LISN#, it is always sent to the context that wrote IOARRIN */
> +	writeq_be(SISL_MSI_SYNC_ERROR, &afu->host_map->ctx_ctrl);
> +	writeq_be(SISL_ISTATUS_MASK, &afu->host_map->intr_mask);
> +}
> +
> +/**
> + * cxlflash_sync_err_irq() - interrupt handler for synchronous errors
> + * @irq:	Interrupt number.
> + * @data:	Private data provided at interrupt registration, the AFU.
> + *
> + * Return: Always return IRQ_HANDLED.
> + */
> +static irqreturn_t cxlflash_sync_err_irq(int irq, void *data)
> +{
> +	struct afu *afu = (struct afu *)data;
> +	u64 reg;
> +	u64 reg_unmasked;
> +
> +	reg = readq_be(&afu->host_map->intr_status);
> +	reg_unmasked = (reg & SISL_ISTATUS_UNMASK);
> +
> +	if (reg_unmasked == 0UL) {
> +		pr_err("%s: %llX: spurious interrupt, intr_status %016llX\n",
> +		       __func__, (u64)afu, reg);
> +		goto cxlflash_sync_err_irq_exit;
> +	}
> +
> +	pr_err("%s: %llX: unexpected interrupt, intr_status %016llX\n",
> +	       __func__, (u64)afu, reg);
> +
> +	writeq_be(reg_unmasked, &afu->host_map->intr_clear);
> +
> +cxlflash_sync_err_irq_exit:
> +	pr_debug("%s: returning rc=%d\n", __func__, IRQ_HANDLED);
> +	return IRQ_HANDLED;
> +}
> +
> +/**
> + * cxlflash_rrq_irq() - interrupt handler for read-response queue (normal path)
> + * @irq:	Interrupt number.
> + * @data:	Private data provided at interrupt registration, the AFU.
> + *
> + * Return: Always return IRQ_HANDLED.
> + */
> +static irqreturn_t cxlflash_rrq_irq(int irq, void *data)
> +{
> +	struct afu *afu = (struct afu *)data;
> +	struct afu_cmd *cmd;
> +	bool toggle = afu->toggle;
> +	u64 entry;
> +	u64 *hrrq_start = afu->hrrq_start,
> +	    *hrrq_end = afu->hrrq_end;
> +	volatile u64 *hrrq_curr = afu->hrrq_curr;
> +
> +	/* Process however many RRQ entries that are ready */
> +	while (true) {
> +		entry = *hrrq_curr;
> +
> +		if ((entry & SISL_RESP_HANDLE_T_BIT) != toggle)
> +			break;
> +
> +		cmd = (struct afu_cmd *)(entry & ~SISL_RESP_HANDLE_T_BIT);
> +		cmd_complete(cmd);
> +
> +		/* Advance to next entry or wrap and flip the toggle bit */
> +		if (hrrq_curr < hrrq_end)
> +			hrrq_curr++;
> +		else {
> +			hrrq_curr = hrrq_start;
> +			toggle ^= SISL_RESP_HANDLE_T_BIT;
> +		}
> +	}
> +
> +	afu->hrrq_curr = hrrq_curr;
> +	afu->toggle = toggle;
> +
> +	return IRQ_HANDLED;
> +}
> +
> +/**
> + * cxlflash_async_err_irq() - interrupt handler for asynchronous errors
> + * @irq:	Interrupt number.
> + * @data:	Private data provided at interrupt registration, the AFU.
> + *
> + * Return: Always return IRQ_HANDLED.
> + */
> +static irqreturn_t cxlflash_async_err_irq(int irq, void *data)
> +{
> +	struct afu *afu = (struct afu *)data;
> +	struct cxlflash_cfg *cfg;
> +	u64 reg_unmasked;
> +	const struct asyc_intr_info *info;
> +	volatile struct sisl_global_map *global = &afu->afu_map->global;

Does this need to be volatile? 

> +	u64 reg;
> +	int i;
> +
> +	cfg = afu->parent;
> +
> +	reg = readq_be(&global->regs.aintr_status);
> +	reg_unmasked = (reg & SISL_ASTATUS_UNMASK);
> +
> +	if (reg_unmasked == 0) {
> +		pr_err("%s: spurious interrupt, aintr_status 0x%016llX\n",
> +		       __func__, reg);
> +		goto out;
> +	}
> +
> +	/* it is OK to clear AFU status before FC_ERROR */
> +	writeq_be(reg_unmasked, &global->regs.aintr_clear);
> +
> +	/* check each bit that is on */
> +	for (i = 0; reg_unmasked; i++, reg_unmasked = (reg_unmasked >> 1)) {
> +		info = find_ainfo(1ULL << i);
> +		if ((reg_unmasked & 0x1) || !info)
> +			continue;
> +
> +		pr_err("%s: %s, fc_status 0x%08llX\n", __func__, info->desc,
> +		       readq_be(&global->fc_regs[info->port][FC_STATUS / 8]));
> +
> +		/*
> +		 * do link reset first, some OTHER errors will set FC_ERROR
> +		 * again if cleared before or w/o a reset
> +		 */
> +		if (info->action & LINK_RESET) {
> +			pr_err("%s: fc %d: resetting link\n",
> +			       __func__, info->port);
> +			cfg->lr_state = LINK_RESET_REQUIRED;
> +			cfg->lr_port = info->port;
> +			schedule_work(&cfg->work_q);
> +		}
> +
> +		if (info->action & CLR_FC_ERROR) {
> +			reg = readq_be(&global->fc_regs[info->port]
> +				       [FC_ERROR / 8]);
> +
> +			/*
> +			 * since all errors are unmasked, FC_ERROR and FC_ERRCAP
> +			 * should be the same and tracing one is sufficient.
> +			 */
> +
> +			pr_err("%s: fc %d: clearing fc_error 0x%08llX\n",
> +			       __func__, info->port, reg);
> +
> +			writeq_be(reg,
> +				  &global->fc_regs[info->port][FC_ERROR /
> +								   8]);
> +			writeq_be(0,
> +				  &global->fc_regs[info->port][FC_ERRCAP /
> +								   8]);
> +		}
> +	}
> +
> +out:
> +	pr_debug("%s: returning rc=%d, afu=%p\n", __func__, IRQ_HANDLED, afu);
> +	return IRQ_HANDLED;
> +}
> +
> +/**
> + * start_context() - starts the master context
> + * @cxlflash:	Internal structure associated with the host.
> + *
> + * Return: A success or failure value from CXL services.
> + */
> +static int start_context(struct cxlflash_cfg *cfg)
> +{
> +	int rc = 0;
> +
> +	rc = cxl_start_context(cfg->mcctx,
> +			       cfg->afu->work.work_element_descriptor,
> +			       NULL);
> +
> +	pr_debug("%s: returning rc=%d\n", __func__, rc);
> +	return rc;
> +}
> +
> +/**
> + * read_vpd() - obtains the WWPNs from VPD
> + * @cxlflash:	Internal structure associated with the host.
> + * @wwpn:	Array of size NUM_FC_PORTS to pass back WWPNs
> + *
> + * Return:
> + *	0 on success
> + *	-ENODEV when VPD or WWPN keywords not found
> + */
> +static int read_vpd(struct cxlflash_cfg *cfg, u64 wwpn[])
> +{
> +	struct pci_dev *dev = cfg->parent_dev;
> +	int rc = 0;
> +	int ro_start, ro_size, i, j, k;
> +	ssize_t vpd_size;
> +	char vpd_data[CXLFLASH_VPD_LEN];
> +	char tmp_buf[WWPN_BUF_LEN] = { 0 };
> +	char *wwpn_vpd_tags[NUM_FC_PORTS] = { "V5", "V6" };
> +
> +	/* Get the VPD data from the device */
> +	vpd_size = pci_read_vpd(dev, 0, sizeof(vpd_data), vpd_data);
> +	if (unlikely(vpd_size <= 0)) {
> +		pr_err("%s: Unable to read VPD (size = %ld)\n",
> +		       __func__, vpd_size);
> +		rc = -ENODEV;
> +		goto out;
> +	}
> +
> +	/* Get the read only section offset */
> +	ro_start = pci_vpd_find_tag(vpd_data, 0, vpd_size,
> +				    PCI_VPD_LRDT_RO_DATA);
> +	if (unlikely(ro_start < 0)) {
> +		pr_err("%s: VPD Read-only data not found\n", __func__);
> +		rc = -ENODEV;
> +		goto out;
> +	}
> +
> +	/* Get the read only section size, cap when extends beyond read VPD */
> +	ro_size = pci_vpd_lrdt_size(&vpd_data[ro_start]);
> +	j = ro_size;
> +	i = ro_start + PCI_VPD_LRDT_TAG_SIZE;
> +	if (unlikely((i + j) > vpd_size)) {
> +		pr_debug("%s: Might need to read more VPD (%d > %ld)\n",
> +			 __func__, (i + j), vpd_size);
> +		ro_size = vpd_size - i;
> +	}
> +
> +	/*
> +	 * Find the offset of the WWPN tag within the read only
> +	 * VPD data and validate the found field (partials are
> +	 * no good to us). Convert the ASCII data to an integer
> +	 * value. Note that we must copy to a temporary buffer
> +	 * because the conversion service requires that the ASCII
> +	 * string be terminated.
> +	 */
> +	for (k = 0; k < NUM_FC_PORTS; k++) {
> +		j = ro_size;
> +		i = ro_start + PCI_VPD_LRDT_TAG_SIZE;
> +
> +		i = pci_vpd_find_info_keyword(vpd_data, i, j, wwpn_vpd_tags[k]);
> +		if (unlikely(i < 0)) {
> +			pr_err("%s: Port %d WWPN not found in VPD\n",
> +			       __func__, k);
> +			rc = -ENODEV;
> +			goto out;
> +		}
> +
> +		j = pci_vpd_info_field_size(&vpd_data[i]);
> +		i += PCI_VPD_INFO_FLD_HDR_SIZE;
> +		if (unlikely((i + j > vpd_size) || (j != WWPN_LEN))) {
> +			pr_err("%s: Port %d WWPN incomplete or VPD corrupt\n",
> +			       __func__, k);
> +			rc = -ENODEV;
> +			goto out;
> +		}
> +
> +		memcpy(tmp_buf, &vpd_data[i], WWPN_LEN);
> +		rc = kstrtoul(tmp_buf, WWPN_LEN, (unsigned long *)&wwpn[k]);
> +		if (unlikely(rc)) {
> +			pr_err("%s: Fail to convert port %d WWPN to integer\n",
> +			       __func__, k);
> +			rc = -ENODEV;
> +			goto out;
> +		}
> +	}
> +
> +out:
> +	pr_debug("%s: returning rc=%d\n", __func__, rc);
> +	return rc;
> +}
> +
> +/**
> + * cxlflash_context_reset() - timeout handler for AFU commands
> + * @cmd:	AFU command that timed out.
> + *
> + * Sends a reset to the AFU.
> + */
> +void cxlflash_context_reset(struct afu_cmd *cmd)
> +{
> +	int nretry = 0;
> +	u64 rrin = 0x1;
> +	struct afu *afu = cmd->parent;
> +
> +	pr_debug("%s: cmd=%p\n", __func__, cmd);
> +
> +	/* First process completion of the command that timed out */
> +	cmd_complete(cmd);
> +
> +	if (afu->room == 0) {
> +		do {
> +			/*
> +			 * We really want to send this reset at all costs, so
> +			 * spread out wait time on successive retries.
> +			 */
> +			udelay(nretry);
> +			afu->room = readq_be(&afu->host_map->cmd_room);
> +		} while ((afu->room == 0) && (nretry++ < MC_ROOM_RETRY_CNT));
> +	}
> +
> +	if (afu->room) {
> +		writeq_be((u64) rrin, &afu->host_map->ioarrin);
> +		do {
> +			rrin = readq_be(&afu->host_map->ioarrin);
> +			/* Double delay each time */
> +			udelay(2 ^ nretry);
> +		} while ((rrin == 0x1) && (nretry++ < MC_ROOM_RETRY_CNT));
> +	} else
> +		pr_err("%s: no cmd_room to send reset\n", __func__);
> +}
> +
> +/**
> + * init_pcr() - initialize the provisioning and control registers
> + * @cxlflash:	Internal structure associated with the host.
> + *
> + * Also sets up fast access to the mapped registers and initializes AFU
> + * command fields that never change.
> + */
> +void init_pcr(struct cxlflash_cfg *cfg)
> +{
> +	struct afu *afu = cfg->afu;
> +	volatile struct sisl_ctrl_map *ctrl_map;
> +	int i;
> +
> +	for (i = 0; i < MAX_CONTEXT; i++) {
> +		ctrl_map = &afu->afu_map->ctrls[i].ctrl;
> +		/* disrupt any clients that could be running */
> +		/* e. g. clients that survived a master restart */
> +		writeq_be(0, &ctrl_map->rht_start);
> +		writeq_be(0, &ctrl_map->rht_cnt_id);
> +		writeq_be(0, &ctrl_map->ctx_cap);
> +	}
> +
> +	/* copy frequently used fields into afu */
> +	afu->ctx_hndl = (u16) cxl_process_element(cfg->mcctx);
> +	/* ctx_hndl is 16 bits in CAIA */
> +	afu->host_map = &afu->afu_map->hosts[afu->ctx_hndl].host;
> +	afu->ctrl_map = &afu->afu_map->ctrls[afu->ctx_hndl].ctrl;
> +
> +	/* Program the Endian Control for the master context */
> +	writeq_be((u64) SISL_ENDIAN_CTRL, &afu->host_map->endian_ctrl);
> +
> +	/* initialize cmd fields that never change */
> +	for (i = 0; i < CXLFLASH_NUM_CMDS; i++) {
> +		afu->cmd[i].rcb.ctx_id = afu->ctx_hndl;
> +		afu->cmd[i].rcb.msi = SISL_MSI_RRQ_UPDATED;
> +		afu->cmd[i].rcb.rrq = 0x0;
> +	}
> +
> +}
> +
> +/**
> + * init_global() - initialize AFU global registers
> + * @cxlflash:	Internal structure associated with the host.
> + */
> +int init_global(struct cxlflash_cfg *cfg)
> +{
> +	struct afu *afu = cfg->afu;
> +	u64 wwpn[NUM_FC_PORTS];	/* wwpn of AFU ports */
> +	int i = 0, num_ports = 0;
> +	int rc = 0;
> +	u64 reg;
> +
> +	rc = read_vpd(cfg, &wwpn[0]);
> +	if (rc) {
> +		pr_err("%s: could not read vpd rc=%d\n", __func__, rc);
> +		goto out;
> +	}
> +
> +	pr_debug("%s: wwpn0=0x%llX wwpn1=0x%llX\n", __func__, wwpn[0], wwpn[1]);
> +
> +	/* set up RRQ in AFU for master issued cmds */
> +	writeq_be((u64) afu->hrrq_start, &afu->host_map->rrq_start);
> +	writeq_be((u64) afu->hrrq_end, &afu->host_map->rrq_end);
> +
> +	/* AFU configuration */
> +	reg = readq_be(&afu->afu_map->global.regs.afu_config);
> +	reg |= SISL_AFUCONF_AR_ALL|SISL_AFUCONF_ENDIAN;
> +	/* enable all auto retry options and control endianness */
> +	/* leave others at default: */
> +	/* CTX_CAP write protected, mbox_r does not clear on read and */
> +	/* checker on if dual afu */
> +	writeq_be(reg, &afu->afu_map->global.regs.afu_config);
> +
> +	/* global port select: select either port */
> +	if (afu->internal_lun) {
> +		/* only use port 0 */
> +		writeq_be(PORT0, &afu->afu_map->global.regs.afu_port_sel);
> +		num_ports = NUM_FC_PORTS - 1;
> +	} else {
> +		writeq_be(BOTH_PORTS, &afu->afu_map->global.regs.afu_port_sel);
> +		num_ports = NUM_FC_PORTS;
> +	}
> +
> +	for (i = 0; i < num_ports; i++) {
> +		/* unmask all errors (but they are still masked at AFU) */
> +		writeq_be(0, &afu->afu_map->global.fc_regs[i][FC_ERRMSK / 8]);
> +		/* clear CRC error cnt & set a threshold */
> +		(void)readq_be(&afu->afu_map->global.
> +			       fc_regs[i][FC_CNT_CRCERR / 8]);
> +		writeq_be(MC_CRC_THRESH, &afu->afu_map->global.fc_regs[i]
> +			  [FC_CRC_THRESH / 8]);
> +
> +		/* set WWPNs. If already programmed, wwpn[i] is 0 */
> +		if (wwpn[i] != 0 &&
> +		    afu_set_wwpn(afu, i,
> +				 &afu->afu_map->global.fc_regs[i][0],
> +				 wwpn[i])) {
> +			pr_debug("%s: failed to set WWPN on port %d\n",
> +				 __func__, i);
> +			rc = -EIO;
> +			goto out;
> +		}
> +		/* Programming WWPN back to back causes additional
> +		 * offline/online transitions and a PLOGI
> +		 */
> +		msleep(100);
> +
> +	}
> +
> +	/* set up master's own CTX_CAP to allow real mode, host translation */
> +	/* tbls, afu cmds and read/write GSCSI cmds. */
> +	/* First, unlock ctx_cap write by reading mbox */
> +	(void)readq_be(&afu->ctrl_map->mbox_r);	/* unlock ctx_cap */
> +	writeq_be((SISL_CTX_CAP_REAL_MODE | SISL_CTX_CAP_HOST_XLATE |
> +		   SISL_CTX_CAP_READ_CMD | SISL_CTX_CAP_WRITE_CMD |
> +		   SISL_CTX_CAP_AFU_CMD | SISL_CTX_CAP_GSCSI_CMD),
> +		  &afu->ctrl_map->ctx_cap);
> +	/* init heartbeat */
> +	afu->hb = readq_be(&afu->afu_map->global.regs.afu_hb);
> +
> +out:
> +	return rc;
> +}
> +
> +/**
> + * start_afu() - initializes and starts the AFU
> + * @cxlflash:	Internal structure associated with the host.
> + */
> +static int start_afu(struct cxlflash_cfg *cfg)
> +{
> +	struct afu *afu = cfg->afu;
> +
> +	int i = 0;
> +	int rc = 0;
> +
> +	for (i = 0; i < CXLFLASH_NUM_CMDS; i++) {
> +		struct timer_list *timer = &afu->cmd[i].timer;
> +
> +		init_timer(timer);
> +		timer->data = (unsigned long)&afu->cmd[i];
> +		timer->function = (void (*)(unsigned long))
> +		    cxlflash_context_reset;
> +
> +		spin_lock_init(&afu->cmd[i].slock);
> +		afu->cmd[i].parent = afu;
> +	}
> +	init_pcr(cfg);
> +
> +	/* initialize RRQ pointers */
> +	afu->hrrq_start = &afu->rrq_entry[0];
> +	afu->hrrq_end = &afu->rrq_entry[NUM_RRQ_ENTRY - 1];
> +	afu->hrrq_curr = afu->hrrq_start;
> +	afu->toggle = 1;
> +
> +	rc = init_global(cfg);
> +
> +	pr_debug("%s: returning rc=%d\n", __func__, rc);
> +	return rc;
> +}
> +
> +/**
> + * init_mc() - create and register as the master context
> + * @cxlflash:	Internal structure associated with the host.
> + *
> + * Return:
> + *	0 on success
> + *	-ENOMEM when unable to obtain a context from CXL services
> + *	A failure value from CXL services.
> + */
> +static int init_mc(struct cxlflash_cfg *cfg)
> +{
> +	struct cxl_context *ctx;
> +	struct device *dev = &cfg->dev->dev;
> +	struct afu *afu = cfg->afu;
> +	int rc = 0;
> +	enum undo_level level;
> +
> +	ctx = cxl_get_context(cfg->dev);
> +	if (!ctx)
> +		return -ENOMEM;
> +	cfg->mcctx = ctx;
> +
> +	/* Set it up as a master with the CXL */
> +	cxl_set_master(ctx);
> +
> +	/* During initialization reset the AFU to start from a clean slate */
> +	rc = cxl_afu_reset(cfg->mcctx);
> +	if (rc) {
> +		dev_err(dev, "%s: initial AFU reset failed rc=%d\n",
> +			__func__, rc);
> +		level = RELEASE_CONTEXT;
> +		goto out;
> +	}
> +
> +	rc = cxl_allocate_afu_irqs(ctx, 3);
> +	if (rc) {
> +		dev_err(dev, "%s: call to allocate_afu_irqs failed rc=%d!\n",
> +			__func__, rc);
> +		level = RELEASE_CONTEXT;
> +		goto out;
> +	}
> +
> +	rc = cxl_map_afu_irq(ctx, 1, cxlflash_sync_err_irq, afu,
> +			     "SISL_MSI_SYNC_ERROR");
> +	if (!rc) {
> +		dev_err(dev, "%s: IRQ 1 (SISL_MSI_SYNC_ERROR) map failed!\n",
> +			__func__);
> +		level = FREE_IRQ;
> +		goto out;
> +	}
> +
> +	rc = cxl_map_afu_irq(ctx, 2, cxlflash_rrq_irq, afu,
> +			     "SISL_MSI_RRQ_UPDATED");
> +	if (!rc) {
> +		dev_err(dev, "%s: IRQ 2 (SISL_MSI_RRQ_UPDATED) map failed!\n",
> +			__func__);
> +		level = UNMAP_ONE;
> +		goto out;
> +	}
> +
> +	rc = cxl_map_afu_irq(ctx, 3, cxlflash_async_err_irq, afu,
> +			     "SISL_MSI_ASYNC_ERROR");
> +	if (!rc) {
> +		dev_err(dev, "%s: IRQ 3 (SISL_MSI_ASYNC_ERROR) map failed!\n",
> +			__func__);
> +		level = UNMAP_TWO;
> +		goto out;
> +	}
> +
> +	rc = 0;
> +
> +	/* This performs the equivalent of the CXL_IOCTL_START_WORK.
> +	 * The CXL_IOCTL_GET_PROCESS_ELEMENT is implicit in the process
> +	 * element (pe) that is embedded in the context (ctx)
> +	 */
> +	rc = start_context(cfg);
> +	if (rc) {
> +		dev_err(dev, "%s: start context failed rc=%d\n", __func__, rc);
> +		level = UNMAP_THREE;
> +		goto out;
> +	}
> +ret:
> +	pr_debug("%s: returning rc=%d\n", __func__, rc);
> +	return rc;
> +out:
> +	term_mc(cfg, level);
> +	goto ret;
> +}
> +
> +/**
> + * init_afu() - setup as master context and start AFU
> + * @cxlflash:	Internal structure associated with the host.
> + *
> + * This routine is a higher level of control for configuring the
> + * AFU on probe and reset paths.
> + *
> + * Return:
> + *	0 on success
> + *	-ENOMEM when unable to map the AFU MMIO space
> + *	A failure value from internal services.
> + */
> +static int init_afu(struct cxlflash_cfg *cfg)
> +{
> +	u64 reg;
> +	int rc = 0;
> +	struct afu *afu = cfg->afu;
> +	struct device *dev = &cfg->dev->dev;
> +
> +
> +	rc = init_mc(cfg);
> +	if (rc) {
> +		dev_err(dev, "%s: call to init_mc failed, rc=%d!\n",
> +			__func__, rc);
> +		goto err1;
> +	}
> +
> +	/* Map the entire MMIO space of the AFU.
> +	 */
> +	afu->afu_map = cxl_psa_map(cfg->mcctx);
> +	if (!afu->afu_map) {
> +		rc = -ENOMEM;
> +		term_mc(cfg, UNDO_START);
> +		dev_err(dev, "%s: call to cxl_psa_map failed!\n", __func__);
> +		goto err1;
> +	}
> +
> +	/* don't byte reverse on reading afu_version, else the string form */
> +	/*     will be backwards */
> +	reg = afu->afu_map->global.regs.afu_version;
> +	memcpy(afu->version, &reg, 8);
> +	afu->interface_version =
> +	    readq_be(&afu->afu_map->global.regs.interface_version);
> +	pr_debug("%s: afu version %s, interface version 0x%llX\n",
> +		 __func__, afu->version, afu->interface_version);
> +
> +	rc = start_afu(cfg);
> +	if (rc) {
> +		dev_err(dev, "%s: call to start_afu failed, rc=%d!\n",
> +			__func__, rc);
> +		term_mc(cfg, UNDO_START);
> +		cxl_psa_unmap((void *)afu->afu_map);
> +		afu->afu_map = NULL;
> +	}
> +
> +	afu_err_intr_init(cfg->afu);
> +
> +err1:
> +	pr_debug("%s: returning rc=%d\n", __func__, rc);
> +	return rc;
> +}
> +
> +/**
> + * cxlflash_send_cmd() - sends an AFU command
> + * @afu:	AFU associated with the host.
> + * @cmd:	AFU command to send.
> + *
> + * Return:
> + *	0 on success
> + *	-1 on failure
> + */
> +int cxlflash_send_cmd(struct afu *afu, struct afu_cmd *cmd)
> +{
> +	int nretry = 0;
> +	int rc = 0;
> +
> +	if (afu->room == 0)
> +		do {
> +			afu->room = readq_be(&afu->host_map->cmd_room);
> +			udelay(nretry);
> +		} while ((afu->room == 0) && (nretry++ < MC_ROOM_RETRY_CNT));

How does afu->room ever go to zero? I see a couple of places where you read it
from the device if it is already zero, but it seems like once you read a non-zero
value from the device you'll never read it again. 

Do you expect to get into this leg of code often? Would it be better to
return SCSI_MLQUEUE_HOST_BUSY here instead?

> +
> +	cmd->sa.host_use_b[0] = 0;	/* 0 means active */
> +	cmd->sa.ioasc = 0;
> +
> +	/* Only kick off the timer for internal commands */
> +	if (cmd->internal) {
> +		cmd->timer.expires = (jiffies +
> +					(cmd->rcb.timeout * 2 * HZ));
> +		add_timer(&cmd->timer);
> +	} else if (cmd->rcb.timeout)
> +		pr_err("%s: timer not started %d\n",
> +		       __func__, cmd->rcb.timeout);
> +
> +	/* Write IOARRIN */
> +	if (afu->room)
> +		writeq_be((u64)&cmd->rcb, &afu->host_map->ioarrin);
> +	else {
> +		pr_err("%s: no cmd_room to send 0x%X\n",
> +		       __func__, cmd->rcb.cdb[0]);
> +		rc = -1;
> +	}
> +
> +	pr_debug("%s: cmd=%p len=%d ea=%p rc=%d\n", __func__, cmd,
> +		 cmd->rcb.data_len, (void *)cmd->rcb.data_ea, rc);
> +
> +	/* Let timer fire to complete the response... */
> +	return rc;
> +}
> +
> +/**
> + * cxlflash_wait_resp() - polls for a response or timeout to a sent AFU command
> + * @afu:	AFU associated with the host.
> + * @cmd:	AFU command that was sent.
> + */
> +void cxlflash_wait_resp(struct afu *afu, struct afu_cmd *cmd)
> +{
> +	while (!(cmd->sa.host_use_b[0] & B_DONE))
> +		cpu_relax();

Could you wait on the sync_wait_q here instead? 

> +
> +	del_timer(&cmd->timer);	/* already stopped if timer fired */
> +
> +	if (cmd->sa.ioasc != 0)
> +		pr_err("%s: CMD 0x%X failed, IOASC: flags 0x%X, afu_rc 0x%X, "
> +		       "scsi_rc 0x%X, fc_rc 0x%X\n", __func__, cmd->rcb.cdb[0],
> +		       cmd->sa.rc.flags, cmd->sa.rc.afu_rc, cmd->sa.rc.scsi_rc,
> +		       cmd->sa.rc.fc_rc);
> +}
> +
> +/**
> + * cxlflash_afu_sync() - builds and sends an AFU sync command
> + * @afu:	AFU associated with the host.
> + * @ctx_hndl_u:	Identifies context requesting sync.
> + * @res_hndl_u:	Identifies resource requesting sync.
> + * @mode:	Type of sync to issue (lightweight, heavyweight, global).
> + *
> + * The AFU can only take 1 sync command at a time. This routine can be
> + * called from both interrupt and process context. The caller is responsible
> + * for any serialization.
> + *
> + * Return:
> + *	0 on success
> + *	-1 on failure
> + */
> +int cxlflash_afu_sync(struct afu *afu, ctx_hndl_t ctx_hndl_u,
> +		      res_hndl_t res_hndl_u, u8 mode)
> +{
> +	struct cxlflash_cfg *cfg = afu->parent;
> +	struct afu_cmd *cmd;
> +	int rc = 0;
> +	int retry_cnt = 0;
> +
> +	while (cfg->sync_active) {
> +		pr_debug("%s: sync issued while one is active\n", __func__);
> +		wait_event(cfg->sync_wait_q, !cfg->sync_active);

The comment before the function indicates this function can be called from interrupt
context, yet here you are doing a wait_event (the udelay is also not nice at interrupt
level). Looking at the code, though, it seems like this function only gets called from
afu_link_reset, which only gets called from cxlflash_worker_thread, so I'm guessing
the comment is just not correct.

However, cxlflash_worker_thread calls this with the host_lock held, so if you ever
got in the while loop here, you'd go to sleep with your host lock held.  Also, cxlflash_worker_thread
cals afu_link_reset, which then calls wait_port_offline, which calls msleep, again
with the host lock held.

> +	}
> +
> +retry:
> +	cmd = cxlflash_cmd_checkout(afu);
> +	if (unlikely(!cmd)) {
> +		retry_cnt++;
> +		pr_debug("%s: could not get command on attempt %d\n",
> +			 __func__, retry_cnt);
> +		udelay(1000*retry_cnt);

The comment before the function indicates this function can be called from interrupt context.

> +		if (retry_cnt < MC_RETRY_CNT)
> +			goto retry;
> +		pr_err("%s: could not get a free command\n", __func__);
> +		rc = -1;
> +		goto out;
> +	}
> +
> +	pr_debug("%s: afu=%p cmd=%p %d\n", __func__, afu, cmd, ctx_hndl_u);
> +
> +	memset(cmd->rcb.cdb, 0, sizeof(cmd->rcb.cdb));
> +
> +	cmd->rcb.req_flags = SISL_REQ_FLAGS_AFU_CMD;
> +	cmd->rcb.port_sel = 0x0;	/* NA */
> +	cmd->rcb.lun_id = 0x0;	/* NA */
> +	cmd->rcb.data_len = 0x0;
> +	cmd->rcb.data_ea = 0x0;
> +	cmd->internal = true;
> +	cmd->sync = true;
> +	cmd->rcb.timeout = MC_AFU_SYNC_TIMEOUT;
> +
> +	cmd->rcb.cdb[0] = 0xC0;	/* AFU Sync */
> +	cmd->rcb.cdb[1] = mode;
> +
> +	cfg->sync_active = true;
> +
> +	/* The cdb is aligned, no unaligned accessors required */
> +	*((u16 *)&cmd->rcb.cdb[2]) = swab16(ctx_hndl_u);
> +	*((u32 *)&cmd->rcb.cdb[4]) = swab32(res_hndl_u);
> +
> +	rc = cxlflash_send_cmd(afu, cmd);
> +	if (!rc)
> +		cxlflash_wait_resp(afu, cmd);
> +
> +	if ((cmd->sa.ioasc != 0) || (cmd->sa.host_use_b[0] & B_ERROR)) {
> +		rc = -1;
> +		/* B_ERROR is set on timeout */

Where does this happen? Is the AFU doing this? If so, perhaps host_use_b
is not the best name for this field?

> +	}
> +
> +out:
> +	pr_debug("%s: returning rc=%d\n", __func__, rc);
> +	return rc;
> +}
> +
> +/**
> + * cxlflash_afu_reset() - resets the AFU
> + * @cxlflash:	Internal structure associated with the host.
> + *
> + * Return:
> + *	0 on success
> + *	A failure value from internal services.
> + */
> +int cxlflash_afu_reset(struct cxlflash_cfg *cfg)
> +{
> +	int rc = 0;
> +	/* Stop the context before the reset. Since the context is
> +	 * no longer available restart it after the reset is complete
> +	 */
> +
> +	term_afu(cfg);
> +
> +	rc = init_afu(cfg);
> +
> +	pr_debug("%s: returning rc=%d\n", __func__, rc);
> +	return rc;
> +}
> +
> +/**
> + * cxlflash_worker_thread() - work thread handler for the AFU
> + * @work:	Work structure contained within cxlflash associated with host.
> + *
> + * Handles link reset which cannot be performed on interrupt context due to
> + * blocking up to a few seconds.
> + */
> +static void cxlflash_worker_thread(struct work_struct *work)
> +{
> +	struct cxlflash_cfg *cfg =
> +	    container_of(work, struct cxlflash_cfg, work_q);
> +	struct afu *afu = cfg->afu;
> +	int port;
> +	unsigned long lock_flags;
> +
> +	spin_lock_irqsave(cfg->host->host_lock, lock_flags);
> +
> +	if (cfg->lr_state == LINK_RESET_REQUIRED) {
> +		port = cfg->lr_port;
> +		if (port < 0)
> +			pr_err("%s: invalid port index %d\n", __func__, port);
> +		else
> +			afu_link_reset(afu, port,
> +				       &afu->afu_map->
> +				       global.fc_regs[port][0]);
> +		cfg->lr_state = LINK_RESET_COMPLETE;
> +	}
> +
> +	spin_unlock_irqrestore(cfg->host->host_lock, lock_flags);
> +}
> +
> +/**
> + * cxlflash_probe() - PCI entry point to add host
> + * @pdev:	PCI device associated with the host.
> + * @dev_id:	PCI device id associated with device.
> + *
> + * Return: 0 on success / non-zero on failure
> + */
> +static int cxlflash_probe(struct pci_dev *pdev,
> +			  const struct pci_device_id *dev_id)
> +{
> +	struct Scsi_Host *host;
> +	struct cxlflash_cfg *cfg = NULL;
> +	struct device *phys_dev;
> +	struct dev_dependent_vals *ddv;
> +	int rc = 0;
> +
> +	dev_dbg(&pdev->dev, "%s: Found CXLFLASH with IRQ: %d\n",
> +		__func__, pdev->irq);
> +
> +	ddv = (struct dev_dependent_vals *)dev_id->driver_data;
> +	driver_template.max_sectors = ddv->max_sectors;
> +
> +	host = scsi_host_alloc(&driver_template, sizeof(struct cxlflash_cfg));
> +	if (!host) {
> +		dev_err(&pdev->dev, "%s: call to scsi_host_alloc failed!\n",
> +			__func__);
> +		rc = -ENOMEM;
> +		goto out;
> +	}
> +
> +	host->max_id = CXLFLASH_MAX_NUM_TARGETS_PER_BUS;
> +	host->max_lun = CXLFLASH_MAX_NUM_LUNS_PER_TARGET;
> +	host->max_channel = NUM_FC_PORTS - 1;
> +	host->unique_id = host->host_no;
> +	host->max_cmd_len = CXLFLASH_MAX_CDB_LEN;
> +
> +	cfg = (struct cxlflash_cfg *)host->hostdata;
> +	cfg->host = host;
> +	rc = alloc_mem(cfg);
> +	if (rc) {
> +		dev_err(&pdev->dev, "%s: call to scsi_host_alloc failed!\n",
> +			__func__);
> +		rc = -ENOMEM;
> +		goto out;
> +	}
> +
> +	cfg->init_state = INIT_STATE_NONE;
> +	cfg->dev = pdev;
> +	cfg->dev_id = (struct pci_device_id *)dev_id;
> +	cfg->tmf_active = 0;
> +	cfg->mcctx = NULL;
> +
> +	init_waitqueue_head(&cfg->tmf_wait_q);
> +	init_waitqueue_head(&cfg->eeh_wait_q);
> +	init_waitqueue_head(&cfg->sync_wait_q);
> +
> +	INIT_WORK(&cfg->work_q, cxlflash_worker_thread);
> +	cfg->lr_state = LINK_RESET_INVALID;
> +	cfg->lr_port = -1;
> +
> +	pci_set_drvdata(pdev, cfg);
> +
> +	/* Use the special service provided to look up the physical
> +	 * PCI device, since we are called on the probe of the virtual
> +	 * PCI host bus (vphb)
> +	 */
> +	phys_dev = cxl_get_phys_dev(pdev);
> +	if (!dev_is_pci(phys_dev)) {
> +		pr_err("%s: not a pci dev\n", __func__);
> +		rc = ENODEV;
> +		goto out_remove;
> +	}
> +	cfg->parent_dev = to_pci_dev(phys_dev);
> +
> +	cfg->cxl_afu = cxl_pci_to_afu(pdev);
> +	rc = init_afu(cfg);
> +	if (rc) {
> +		dev_err(&pdev->dev, "%s: call to init_afu "
> +			"failed rc=%d!\n", __func__, rc);
> +		goto out_remove;
> +	}
> +	cfg->init_state = INIT_STATE_AFU;
> +
> +	rc = init_pci(cfg);
> +	if (rc) {
> +		dev_err(&pdev->dev, "%s: call to init_pci "
> +			"failed rc=%d!\n", __func__, rc);
> +		goto out_remove;
> +	}
> +	cfg->init_state = INIT_STATE_PCI;
> +
> +	rc = init_scsi(cfg);
> +	if (rc) {
> +		dev_err(&pdev->dev, "%s: call to init_scsi "
> +			"failed rc=%d!\n", __func__, rc);
> +		goto out_remove;
> +	}
> +	cfg->init_state = INIT_STATE_SCSI;
> +
> +out:
> +	pr_debug("%s: returning rc=%d\n", __func__, rc);
> +	return rc;
> +
> +out_remove:
> +	cxlflash_remove(pdev);
> +	goto out;
> +}
> +
> +/*
> + * PCI device structure
> + */
> +static struct pci_driver cxlflash_driver = {
> +	.name = CXLFLASH_NAME,
> +	.id_table = cxlflash_pci_table,
> +	.probe = cxlflash_probe,
> +	.remove = cxlflash_remove,
> +};
> +
> +/**
> + * init_cxlflash() - module entry point
> + *
> + * Return: 0 on success / non-zero on failure
> + */
> +static int __init init_cxlflash(void)
> +{
> +	pr_info("%s: IBM Power CXL Flash Adapter: %s\n",
> +		__func__, CXLFLASH_DRIVER_DATE);
> +
> +	return pci_register_driver(&cxlflash_driver);
> +}
> +
> +/**
> + * exit_cxlflash() - module exit point
> + */
> +static void __exit exit_cxlflash(void)
> +{
> +	pci_unregister_driver(&cxlflash_driver);
> +}
> +
> +module_init(init_cxlflash);
> +module_exit(exit_cxlflash);
> diff --git a/drivers/scsi/cxlflash/main.h b/drivers/scsi/cxlflash/main.h
> new file mode 100644
> index 0000000..014ecb8
> --- /dev/null
> +++ b/drivers/scsi/cxlflash/main.h
> @@ -0,0 +1,111 @@
> +/*
> + * CXL Flash Device Driver
> + *
> + * Written by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>, IBM Corporation
> + *             Matthew R. Ochs <mrochs@linux.vnet.ibm.com>, IBM Corporation
> + *
> + * Copyright (C) 2015 IBM Corporation
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#ifndef _CXLFLASH_MAIN_H
> +#define _CXLFLASH_MAIN_H
> +
> +#include <linux/list.h>
> +#include <linux/types.h>
> +#include <scsi/scsi.h>
> +#include <scsi/scsi_device.h>
> +
> +typedef unsigned int useconds_t;        /* time in microseconds */
> +
> +#define CXLFLASH_NAME                      "cxlflash"
> +#define CXLFLASH_ADAPTER_NAME              "IBM POWER CXL Flash Adapter"
> +#define CXLFLASH_DRIVER_DATE              "(May 15, 2015)"
> +
> +#define PCI_DEVICE_ID_IBM_CORSA		0x04F0
> +#define CXLFLASH_SUBS_DEV_ID		0x04F0
> +
> +/* Since there is only one target, make it 0 */
> +#define CXLFLASH_TARGET                   0x0
> +#define CXLFLASH_MAX_CDB_LEN		16
> +
> +/* Really only one target per bus since the Texan is directly attached */
> +#define CXLFLASH_MAX_NUM_TARGETS_PER_BUS                     1
> +#define CXLFLASH_MAX_NUM_LUNS_PER_TARGET                     65536
> +
> +#define CXLFLASH_PCI_ERROR_RECOVERY_TIMEOUT  (120 * HZ)
> +
> +#define NUM_FC_PORTS     CXLFLASH_NUM_FC_PORTS  /* ports per AFU */
> +
> +/* FC defines */
> +#define FC_MTIP_CMDCONFIG 0x010
> +#define FC_MTIP_STATUS 0x018
> +
> +#define FC_PNAME 0x300
> +#define FC_CONFIG 0x320
> +#define FC_CONFIG2 0x328
> +#define FC_STATUS 0x330
> +#define FC_ERROR 0x380
> +#define FC_ERRCAP 0x388
> +#define FC_ERRMSK 0x390
> +#define FC_CNT_CRCERR 0x538
> +#define FC_CRC_THRESH 0x580
> +
> +#define FC_MTIP_CMDCONFIG_ONLINE    0x20ull
> +#define FC_MTIP_CMDCONFIG_OFFLINE   0x40ull
> +
> +#define FC_MTIP_STATUS_MASK         0x30ull
> +#define FC_MTIP_STATUS_ONLINE       0x20ull
> +#define FC_MTIP_STATUS_OFFLINE      0x10ull
> +
> +/* TIMEOUT and RETRY definitions */
> +
> +/* AFU command timeout values */
> +#define MC_AFU_SYNC_TIMEOUT  5	/* 5 secs */
> +
> +/* AFU command room retry limit */
> +#define MC_ROOM_RETRY_CNT    10
> +
> +/* FC CRC clear periodic timer */
> +#define MC_CRC_THRESH 100	/* threshold in 5 mins */
> +
> +#define FC_PORT_STATUS_RETRY_CNT 100	/* 100 100ms retries = 10 seconds */
> +#define FC_PORT_STATUS_RETRY_INTERVAL_US 100000	/* microseconds */
> +
> +/* VPD defines */
> +#define CXLFLASH_VPD_LEN	256
> +#define WWPN_LEN	16
> +#define WWPN_BUF_LEN	(WWPN_LEN + 1)
> +
> +enum undo_level {
> +	RELEASE_CONTEXT = 0,
> +	FREE_IRQ,
> +	UNMAP_ONE,
> +	UNMAP_TWO,
> +	UNMAP_THREE,
> +	UNDO_START
> +};
> +
> +struct dev_dependent_vals {
> +	u64 max_sectors;
> +};
> +
> +struct asyc_intr_info {
> +	u64 status;
> +	char *desc;
> +	u8 port;
> +	u8 action;
> +#define CLR_FC_ERROR   0x01
> +#define LINK_RESET     0x02
> +};
> +
> +/*
> + * Externs and Prototypes
> + */
> +int cxlflash_ioctl(struct scsi_device *, int, void __user *);
> +
> +#endif /* _CXLFLASH_MAIN_H */
> diff --git a/drivers/scsi/cxlflash/sislite.h b/drivers/scsi/cxlflash/sislite.h
> new file mode 100755
> index 0000000..aeccf63
> --- /dev/null
> +++ b/drivers/scsi/cxlflash/sislite.h
> @@ -0,0 +1,465 @@
> +/*
> + * CXL Flash Device Driver
> + *
> + * Written by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>, IBM Corporation
> + *             Matthew R. Ochs <mrochs@linux.vnet.ibm.com>, IBM Corporation
> + *
> + * Copyright (C) 2015 IBM Corporation
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#ifndef _SISLITE_H
> +#define _SISLITE_H
> +
> +#include <linux/types.h>
> +
> +typedef u16 ctx_hndl_t;
> +typedef u32 res_hndl_t;
> +
> +#define SIZE_4K		4096
> +#define SIZE_64K	65536
> +
> +/*
> + * IOARCB: 64 bytes, min 16 byte alignment required, host native endianness
> + * except for SCSI CDB which remains big endian per SCSI standards.
> + */
> +struct sisl_ioarcb {
> +	u16 ctx_id;		/* ctx_hndl_t */
> +	u16 req_flags;
> +#define SISL_REQ_FLAGS_RES_HNDL       0x8000u	/* bit 0 (MSB) */
> +#define SISL_REQ_FLAGS_PORT_LUN_ID    0x0000u
> +
> +#define SISL_REQ_FLAGS_SUP_UNDERRUN   0x4000u	/* bit 1 */
> +
> +#define SISL_REQ_FLAGS_TIMEOUT_SECS   0x0000u	/* bits 8,9 */
> +#define SISL_REQ_FLAGS_TIMEOUT_MSECS  0x0040u
> +#define SISL_REQ_FLAGS_TIMEOUT_USECS  0x0080u
> +#define SISL_REQ_FLAGS_TIMEOUT_CYCLES 0x00C0u
> +
> +#define SISL_REQ_FLAGS_TMF_CMD        0x0004u	/* bit 13 */
> +
> +#define SISL_REQ_FLAGS_AFU_CMD        0x0002u	/* bit 14 */
> +
> +#define SISL_REQ_FLAGS_HOST_WRITE     0x0001u	/* bit 15 (LSB) */
> +#define SISL_REQ_FLAGS_HOST_READ      0x0000u
> +
> +	union {
> +		u32 res_hndl;	/* res_hndl_t */
> +		u32 port_sel;	/* this is a selection mask:
> +				 * 0x1 -> port#0 can be selected,
> +				 * 0x2 -> port#1 can be selected.
> +				 * Can be bitwise ORed.
> +				 */
> +	};
> +	u64 lun_id;
> +	u32 data_len;		/* 4K for read/write */
> +	u32 ioadl_len;
> +	union {
> +		u64 data_ea;	/* min 16 byte aligned */
> +		u64 ioadl_ea;
> +	};
> +	u8 msi;			/* LISN to send on RRQ write */
> +#define SISL_MSI_CXL_PFAULT        0	/* reserved for CXL page faults */
> +#define SISL_MSI_SYNC_ERROR        1	/* recommended for AFU sync error */
> +#define SISL_MSI_RRQ_UPDATED       2	/* recommended for IO completion */
> +#define SISL_MSI_ASYNC_ERROR       3	/* master only - for AFU async error */
> +
> +	u8 rrq;			/* 0 for a single RRQ */
> +	u16 timeout;		/* in units specified by req_flags */
> +	u32 rsvd1;
> +	u8 cdb[16];		/* must be in big endian */
> +	struct scsi_cmnd *scp;
> +};
> +
> +struct sisl_rc {
> +	u8 flags;
> +#define SISL_RC_FLAGS_SENSE_VALID         0x80u
> +#define SISL_RC_FLAGS_FCP_RSP_CODE_VALID  0x40u
> +#define SISL_RC_FLAGS_OVERRUN             0x20u
> +#define SISL_RC_FLAGS_UNDERRUN            0x10u
> +
> +	u8 afu_rc;
> +#define SISL_AFU_RC_RHT_INVALID           0x01u	/* user error */
> +#define SISL_AFU_RC_RHT_UNALIGNED         0x02u	/* should never happen */
> +#define SISL_AFU_RC_RHT_OUT_OF_BOUNDS     0x03u	/* user error */
> +#define SISL_AFU_RC_RHT_DMA_ERR           0x04u	/* see afu_extra
> +						   may retry if afu_retry is off
> +						   possible on master exit
> +						 */
> +#define SISL_AFU_RC_RHT_RW_PERM           0x05u	/* no RW perms, user error */
> +#define SISL_AFU_RC_LXT_UNALIGNED         0x12u	/* should never happen */
> +#define SISL_AFU_RC_LXT_OUT_OF_BOUNDS     0x13u	/* user error */
> +#define SISL_AFU_RC_LXT_DMA_ERR           0x14u	/* see afu_extra
> +						   may retry if afu_retry is off
> +						   possible on master exit
> +						 */
> +#define SISL_AFU_RC_LXT_RW_PERM           0x15u	/* no RW perms, user error */
> +
> +#define SISL_AFU_RC_NOT_XLATE_HOST        0x1au	/* possible if master exited */
> +
> +	/* NO_CHANNELS means the FC ports selected by dest_port in
> +	 * IOARCB or in the LXT entry are down when the AFU tried to select
> +	 * a FC port. If the port went down on an active IO, it will set
> +	 * fc_rc to =0x54(NOLOGI) or 0x57(LINKDOWN) instead.
> +	 */
> +#define SISL_AFU_RC_NO_CHANNELS           0x20u	/* see afu_extra, may retry */
> +#define SISL_AFU_RC_CAP_VIOLATION         0x21u	/* either user error or
> +						   afu reset/master restart
> +						 */
> +#define SISL_AFU_RC_OUT_OF_DATA_BUFS      0x30u	/* always retry */
> +#define SISL_AFU_RC_DATA_DMA_ERR          0x31u	/* see afu_extra
> +						   may retry if afu_retry is off
> +						 */
> +
> +	u8 scsi_rc;		/* SCSI status byte, retry as appropriate */
> +#define SISL_SCSI_RC_CHECK                0x02u
> +#define SISL_SCSI_RC_BUSY                 0x08u
> +
> +	u8 fc_rc;		/* retry */
> +	/*
> +	 * We should only see fc_rc=0x57 (LINKDOWN) or 0x54(NOLOGI) for
> +	 * commands that are in flight when a link goes down or is logged out.
> +	 * If the link is down or logged out before AFU selects the port, either
> +	 * it will choose the other port or we will get afu_rc=0x20 (no_channel)
> +	 * if there is no valid port to use.
> +	 *
> +	 * ABORTPEND/ABORTOK/ABORTFAIL/TGTABORT can be retried, typically these
> +	 * would happen if a frame is dropped and something times out.
> +	 * NOLOGI or LINKDOWN can be retried if the other port is up.
> +	 * RESIDERR can be retried as well.
> +	 *
> +	 * ABORTFAIL might indicate that lots of frames are getting CRC errors.
> +	 * So it maybe retried once and reset the link if it happens again.
> +	 * The link can also be reset on the CRC error threshold interrupt.
> +	 */
> +#define SISL_FC_RC_ABORTPEND	0x52	/* exchange timeout or abort request */
> +#define SISL_FC_RC_WRABORTPEND	0x53	/* due to write XFER_RDY invalid */
> +#define SISL_FC_RC_NOLOGI	0x54	/* port not logged in, in-flight cmds */
> +#define SISL_FC_RC_NOEXP	0x55	/* FC protocol error or HW bug */
> +#define SISL_FC_RC_INUSE	0x56	/* tag already in use, HW bug */
> +#define SISL_FC_RC_LINKDOWN	0x57	/* link down, in-flight cmds */
> +#define SISL_FC_RC_ABORTOK	0x58	/* pending abort completed w/success */
> +#define SISL_FC_RC_ABORTFAIL	0x59	/* pending abort completed w/fail */
> +#define SISL_FC_RC_RESID	0x5A	/* ioasa underrun/overrun flags set */
> +#define SISL_FC_RC_RESIDERR	0x5B	/* actual data len does not match SCSI
> +					   reported len, possbly due to dropped
> +					   frames */
> +#define SISL_FC_RC_TGTABORT	0x5C	/* command aborted by target */
> +};
> +
> +#define SISL_SENSE_DATA_LEN     20	/* Sense data length         */
> +
> +/*
> + * IOASA: 64 bytes & must follow IOARCB, min 16 byte alignment required,
> + * host native endianness
> + */
> +struct sisl_ioasa {
> +	union {
> +		struct sisl_rc rc;
> +		u32 ioasc;
> +#define SISL_IOASC_GOOD_COMPLETION        0x00000000u
> +	};
> +	u32 resid;
> +	u8 port;
> +	u8 afu_extra;
> +	/* when afu_rc=0x04, 0x14, 0x31 (_xxx_DMA_ERR):
> +	 * afu_exta contains PSL response code. Useful codes are:
> +	 */
> +#define SISL_AFU_DMA_ERR_PAGE_IN	0x0A	/* AFU_retry_on_pagein Action
> +						 *  Enabled            N/A
> +						 *  Disabled           retry
> +						 */
> +#define SISL_AFU_DMA_ERR_INVALID_EA	0x0B	/* this is a hard error
> +						 * afu_rc	Implies
> +						 * 0x04, 0x14	master exit.
> +						 * 0x31         user error.
> +						 */
> +	/* when afu rc=0x20 (no channels):
> +	 * afu_extra bits [4:5]: available portmask,  [6:7]: requested portmask.
> +	 */
> +#define SISL_AFU_NO_CLANNELS_AMASK(afu_extra) (((afu_extra) & 0x0C) >> 2)
> +#define SISL_AFU_NO_CLANNELS_RMASK(afu_extra) ((afu_extra) & 0x03)
> +
> +	u8 scsi_extra;
> +	u8 fc_extra;
> +	u8 sense_data[SISL_SENSE_DATA_LEN];
> +
> +	/* These fields are defined by the SISlite architecture for the
> +	 * host to use as they see fit for their implementation.
> +	 */
> +	union {
> +		u64 host_use[4];
> +		u8 host_use_b[32];
> +	};
> +};

Should this have __attribute__(packed)?

> +
> +#define SISL_RESP_HANDLE_T_BIT        0x1ull	/* Toggle bit */
> +
> +/* MMIO space is required to support only 64-bit access */
> +
> +/*
> + * This AFU has two mechanisms to deal with endian-ness.
> + * One is a global configuration (in the afu_config) register
> + * below that specifies the endian-ness of the host.
> + * The other is a per context (i.e. application) specification
> + * controlled by the endian_ctrl field here. Since the master
> + * context is one such application the master context's
> + * endian-ness is set to be the same as the host.
> + *
> + * As per the SISlite spec, the MMIO registers are always
> + * big endian.
> + */
> +#define SISL_ENDIAN_CTRL_BE           0x8000000000000080ull
> +#define SISL_ENDIAN_CTRL_LE           0x0000000000000000ull
> +
> +#ifdef __BIG_ENDIAN
> +#define SISL_ENDIAN_CTRL              SISL_ENDIAN_CTRL_BE
> +#else
> +#define SISL_ENDIAN_CTRL              SISL_ENDIAN_CTRL_LE
> +#endif
> +
> +/* per context host transport MMIO  */
> +struct sisl_host_map {
> +	__be64 endian_ctrl;     /* Per context Endian Control. The AFU will
> +			      * operate on whatever the context is of the
> +			      * host application.
> +			      */
> +
> +	__be64 intr_status;	/* this sends LISN# programmed in ctx_ctrl.
> +				 * Only recovery in a PERM_ERR is a context
> +				 * exit since there is no way to tell which
> +				 * command caused the error.
> +				 */
> +#define SISL_ISTATUS_PERM_ERR_CMDROOM    0x0010ull	/* b59, user error */
> +#define SISL_ISTATUS_PERM_ERR_RCB_READ   0x0008ull	/* b60, user error */
> +#define SISL_ISTATUS_PERM_ERR_SA_WRITE   0x0004ull	/* b61, user error */
> +#define SISL_ISTATUS_PERM_ERR_RRQ_WRITE  0x0002ull	/* b62, user error */
> +	/* Page in wait accessing RCB/IOASA/RRQ is reported in b63.
> +	 * Same error in data/LXT/RHT access is reported via IOASA.
> +	 */
> +#define SISL_ISTATUS_TEMP_ERR_PAGEIN     0x0001ull	/* b63, can be generated
> +							 * only when AFU auto
> +							 * retry is disabled.
> +							 * If user can determine
> +							 * the command that
> +							 * caused the error, it
> +							 * can be retried.
> +							 */
> +#define SISL_ISTATUS_UNMASK  (0x001Full)	/* 1 means unmasked */
> +#define SISL_ISTATUS_MASK    ~(SISL_ISTATUS_UNMASK)	/* 1 means masked */
> +
> +	__be64 intr_clear;
> +	__be64 intr_mask;
> +	__be64 ioarrin;		/* only write what cmd_room permits */
> +	__be64 rrq_start;	/* start & end are both inclusive */
> +	__be64 rrq_end;		/* write sequence: start followed by end */
> +	__be64 cmd_room;
> +	__be64 ctx_ctrl;	/* least signiifcant byte or b56:63 is LISN# */
> +	__be64 mbox_w;		/* restricted use */
> +};
> +
> +/* per context provisioning & control MMIO */
> +struct sisl_ctrl_map {
> +	__be64 rht_start;
> +	__be64 rht_cnt_id;
> +	/* both cnt & ctx_id args must be ull */
> +#define SISL_RHT_CNT_ID(cnt, ctx_id)  (((cnt) << 48) | ((ctx_id) << 32))
> +
> +	__be64 ctx_cap;	/* afu_rc below is when the capability is violated */
> +#define SISL_CTX_CAP_PROXY_ISSUE       0x8000000000000000ull /* afu_rc 0x21 */
> +#define SISL_CTX_CAP_REAL_MODE         0x4000000000000000ull /* afu_rc 0x21 */
> +#define SISL_CTX_CAP_HOST_XLATE        0x2000000000000000ull /* afu_rc 0x1a */
> +#define SISL_CTX_CAP_PROXY_TARGET      0x1000000000000000ull /* afu_rc 0x21 */
> +#define SISL_CTX_CAP_AFU_CMD           0x0000000000000008ull /* afu_rc 0x21 */
> +#define SISL_CTX_CAP_GSCSI_CMD         0x0000000000000004ull /* afu_rc 0x21 */
> +#define SISL_CTX_CAP_WRITE_CMD         0x0000000000000002ull /* afu_rc 0x21 */
> +#define SISL_CTX_CAP_READ_CMD          0x0000000000000001ull /* afu_rc 0x21 */
> +	__be64 mbox_r;
> +};
> +
> +/* single copy global regs */
> +struct sisl_global_regs {
> +	__be64 aintr_status;
> +	/* In cxlflash, each FC port/link gets a byte of status */
> +#define SISL_ASTATUS_FC0_OTHER	 0x8000ull /* b48, other err,
> +					      FC_ERRCAP[31:20] */
> +#define SISL_ASTATUS_FC0_LOGO    0x4000ull /* b49, target sent FLOGI/PLOGI/LOGO
> +						   while logged in */
> +#define SISL_ASTATUS_FC0_CRC_T   0x2000ull /* b50, CRC threshold exceeded */
> +#define SISL_ASTATUS_FC0_LOGI_R  0x1000ull /* b51, login state mechine timed out
> +						   and retrying */
> +#define SISL_ASTATUS_FC0_LOGI_F  0x0800ull /* b52, login failed,
> +					      FC_ERROR[19:0] */
> +#define SISL_ASTATUS_FC0_LOGI_S  0x0400ull /* b53, login succeeded */
> +#define SISL_ASTATUS_FC0_LINK_DN 0x0200ull /* b54, link online to offline */
> +#define SISL_ASTATUS_FC0_LINK_UP 0x0100ull /* b55, link offline to online */
> +
> +#define SISL_ASTATUS_FC1_OTHER   0x0080ull /* b56 */
> +#define SISL_ASTATUS_FC1_LOGO    0x0040ull /* b57 */
> +#define SISL_ASTATUS_FC1_CRC_T   0x0020ull /* b58 */
> +#define SISL_ASTATUS_FC1_LOGI_R  0x0010ull /* b59 */
> +#define SISL_ASTATUS_FC1_LOGI_F  0x0008ull /* b60 */
> +#define SISL_ASTATUS_FC1_LOGI_S  0x0004ull /* b61 */
> +#define SISL_ASTATUS_FC1_LINK_DN 0x0002ull /* b62 */
> +#define SISL_ASTATUS_FC1_LINK_UP 0x0001ull /* b63 */
> +
> +#define SISL_FC_INTERNAL_UNMASK	0x0000000300000000ull	/* 1 means unmasked */
> +#define SISL_FC_INTERNAL_MASK	~(SISL_FC_INTERNAL_UNMASK)
> +#define SISL_FC_INTERNAL_SHIFT	32
> +
> +#define SISL_ASTATUS_UNMASK	0xFFFFull		/* 1 means unmasked */
> +#define SISL_ASTATUS_MASK	~(SISL_ASTATUS_UNMASK)	/* 1 means masked */
> +
> +	__be64 aintr_clear;
> +	__be64 aintr_mask;
> +	__be64 afu_ctrl;
> +	__be64 afu_hb;
> +	__be64 afu_scratch_pad;
> +	__be64 afu_port_sel;
> +#define SISL_AFUCONF_AR_IOARCB	0x4000ull
> +#define SISL_AFUCONF_AR_LXT	0x2000ull
> +#define SISL_AFUCONF_AR_RHT	0x1000ull
> +#define SISL_AFUCONF_AR_DATA	0x0800ull
> +#define SISL_AFUCONF_AR_RSRC	0x0400ull
> +#define SISL_AFUCONF_AR_IOASA	0x0200ull
> +#define SISL_AFUCONF_AR_RRQ	0x0100ull
> +/* Aggregate all Auto Retry Bits */
> +#define SISL_AFUCONF_AR_ALL	(SISL_AFUCONF_AR_IOARCB|SISL_AFUCONF_AR_LXT| \
> +				 SISL_AFUCONF_AR_RHT|SISL_AFUCONF_AR_DATA|   \
> +				 SISL_AFUCONF_AR_RSRC|SISL_AFUCONF_AR_IOASA| \
> +				 SISL_AFUCONF_AR_RRQ)
> +#ifdef __BIG_ENDIAN
> +#define SISL_AFUCONF_ENDIAN            0x0000ull
> +#else
> +#define SISL_AFUCONF_ENDIAN            0x0020ull
> +#endif
> +#define SISL_AFUCONF_MBOX_CLR_READ     0x0010ull
> +	__be64 afu_config;
> +	__be64 rsvd[0xf8];
> +	__be64 afu_version;
> +	__be64 interface_version;
> +};
> +
> +#define CXLFLASH_NUM_FC_PORTS   2
> +#define CXLFLASH_MAX_CONTEXT  512	/* how many contexts per afu */
> +#define CXLFLASH_NUM_VLUNS    512
> +
> +struct sisl_global_map {
> +	union {
> +		struct sisl_global_regs regs;
> +		char page0[SIZE_4K];	/* page 0 */
> +	};
> +
> +	char page1[SIZE_4K];	/* page 1 */
> +
> +	/* pages 2 & 3 */
> +	__be64 fc_regs[CXLFLASH_NUM_FC_PORTS][CXLFLASH_NUM_VLUNS];
> +
> +	/* pages 4 & 5 (lun tbl) */
> +	__be64 fc_port[CXLFLASH_NUM_FC_PORTS][CXLFLASH_NUM_VLUNS];
> +
> +};
> +
> +/*
> + * CXL Flash Memory Map
> + *
> + *	+-------------------------------+
> + *	|    512 * 64 KB User MMIO      |
> + *	|        (per context)          |
> + *	|       User Accessible         |
> + *	+-------------------------------+
> + *	|    512 * 128 B per context    |
> + *	|    Provisioning and Control   |
> + *	|   Trusted Process accessible  |
> + *	+-------------------------------+
> + *	|         64 KB Global          |
> + *	|   Trusted Process accessible  |
> + *	+-------------------------------+
> +*/
> +struct cxlflash_afu_map {
> +	union {
> +		struct sisl_host_map host;
> +		char harea[SIZE_64K];	/* 64KB each */
> +	} hosts[CXLFLASH_MAX_CONTEXT];
> +
> +	union {
> +		struct sisl_ctrl_map ctrl;
> +		char carea[cache_line_size()];	/* 128B each */
> +	} ctrls[CXLFLASH_MAX_CONTEXT];
> +
> +	union {
> +		struct sisl_global_map global;
> +		char garea[SIZE_64K];	/* 64KB single block */
> +	};
> +};
> +
> +/* LBA translation control blocks */
> +
> +struct sisl_lxt_entry {
> +	u64 rlba_base;	/* bits 0:47 is base
> +				 * b48:55 is lun index
> +				 * b58:59 is write & read perms
> +				 * (if no perm, afu_rc=0x15)
> +				 * b60:63 is port_sel mask
> +				 */
> +
> +};
> +
> +/* Per the SISlite spec, RHT entries are to be 16-byte aligned */
> +struct sisl_rht_entry {
> +	struct sisl_lxt_entry *lxt_start;
> +	u32 lxt_cnt;
> +	u16 rsvd;
> +	u8 fp;			/* format & perm nibbles.
> +				 * (if no perm, afu_rc=0x05)
> +				 */
> +	u8 nmask;
> +} __aligned(16);
> +
> +struct sisl_rht_entry_f1 {
> +	u64 lun_id;
> +	union {
> +		struct {
> +			u8 valid;
> +			u8 rsvd[5];
> +			u8 fp;
> +			u8 port_sel;
> +		};
> +
> +		u64 dw;
> +	};
> +} __aligned(16);

For structures like these that look to be shared with the hardware, what you probably want is:

__attribute__((packed, aligned (16)));


> +
> +/* make the fp byte */
> +#define SISL_RHT_FP(fmt, perm) (((fmt) << 4) | (perm))
> +
> +/* make the fp byte for a clone from a source fp and clone flags
> + * flags must be only 2 LSB bits.
> + */
> +#define SISL_RHT_FP_CLONE(src_fp, cln_flags) ((src_fp) & (0xFC | (cln_flags)))
> +
> +#define RHT_PERM_READ  0x01u
> +#define RHT_PERM_WRITE 0x02u
> +#define RHT_PERM_RW    (RHT_PERM_READ | RHT_PERM_WRITE)
> +
> +/* extract the perm bits from a fp */
> +#define SISL_RHT_PERM(fp) ((fp) & RHT_PERM_RW)
> +
> +#define PORT0  0x01u
> +#define PORT1  0x02u
> +#define BOTH_PORTS    (PORT0 | PORT1)
> +
> +/* AFU Sync Mode byte */
> +#define AFU_LW_SYNC 0x0u
> +#define AFU_HW_SYNC 0x1u
> +#define AFU_GSYNC   0x2u
> +
> +/* Special Task Management Function CDB */
> +#define TMF_LUN_RESET  0x1u
> +#define TMF_CLEAR_ACA  0x2u
> +
> +#endif /* _SISLITE_H */
>

Matthew R. Ochs May 31, 2015, 4:16 a.m. UTC | #2

Brian,

Thanks for reviewing this patch. Responses are inline below.


-matt

On May 29, 2015, at 3:54 PM, Brian King wrote:

>> +/* Check for power of 2 at compile time */
>> +#define NOT_POW2(_x) ((_x) && ((_x) & ((_x) - 1)))
>> +#if NOT_POW2(CXLFLASH_NUM_CMDS)
>> +#error "CXLFLASH_NUM_CMDS is not a power of 2!"
>> +#endif
> 
> Can you use BUILD_BUG_ON_NOT_POWER_OF_2 in include/linux/bug.h for this instead?

Was not aware of this define...sure.


>> +void cxlflash_cmd_checkin(struct afu_cmd *cmd)
>> +{
>> +	if (unlikely(atomic_inc_return(&cmd->free) != 1)) {
>> +		pr_err("%s: Freeing cmd (%d) that is not in use!\n",
>> +		       __func__, cmd->slot);
>> +		return;
>> +	}
> 
> Seems like its possible for another thread to grab the cmd at this point and
> start using it before the re-init below occurs, such that the following
> writes could happen when you don't want them to. If you re-init the command
> before setting the free bit you should be ok.
> 
>> +
>> +	cmd->special = 0;
>> +	cmd->internal = false;
>> +	cmd->sync = false;
>> +	cmd->rcb.timeout = 0;

Good catch. We'll move this up above the free bit statement.


>> +	if (ioasa->rc.scsi_rc) {
>> +		/* We have a SCSI status */
>> +		if (ioasa->rc.flags & SISL_RC_FLAGS_SENSE_VALID)
>> +			memcpy(scp->sense_buffer, ioasa->sense_data,
>> +			       SISL_SENSE_DATA_LEN);
>> +		scp->result = ioasa->rc.scsi_rc | (DID_ERROR << 16);
> 
> If there is valid sense data here you don't want to set DID_ERROR. By setting
> DID_ERROR here, scsi_decide_disposition won't use the sense data to determine
> what EH action to perform.
> 

Ok, will replace with DRIVER_BYTE(DRIVER_SENSE)


>> +	if (ioasa->rc.fc_rc) {
>> +		/* We have an FC status */
>> +		switch (ioasa->rc.fc_rc) {
>> +		case SISL_FC_RC_RESIDERR:
>> +			/* Resid mismatch between adapter and device */
>> +		case SISL_FC_RC_TGTABORT:
>> +		case SISL_FC_RC_ABORTOK:
>> +		case SISL_FC_RC_ABORTFAIL:
>> +		case SISL_FC_RC_LINKDOWN:
>> +		case SISL_FC_RC_NOLOGI:
>> +		case SISL_FC_RC_ABORTPEND:
>> +			scp->result = (DID_IMM_RETRY << 16);
> 
> So if someone comes and pulls the cables on the card you are going to
> return DID_IMM_RETRY for all I/O sent? 

No, will return DID_REQUEUE for the cable pull case.


>> +		case SISL_FC_RC_RESID:
>> +			/* This indicates an FCP resid underrun */
>> +			if (!(ioasa->rc.flags & SISL_RC_FLAGS_OVERRUN)) {
>> +				/* If the SISL_RC_FLAGS_OVERRUN flag was set,
>> +				 * then we will handle this error else where.
>> +				 * If not then we must handle it here.
>> +				 * This is probably an AFU bug. We will
>> +				 * attempt a retry to see if that resolves it.
>> +				 */
>> +				scp->result = (DID_IMM_RETRY << 16);
> 
> DID_IMM_RETRY probably isn't what you want. This will force a retry and NOT
> decrement the retry counter, so if it is an AFU bug you'd better be sure
> there is no way this is a hard condition, otherwise you'll retry until we
> hit the timeout. Returning DID_ERROR might be better.

Ok, will replace with DID_ERROR.


>> +static void cmd_complete(struct afu_cmd *cmd)
>> +{
>> +	struct scsi_cmnd *scp;
>> +	struct afu *afu = cmd->parent;
>> +	struct cxlflash_cfg *cfg = afu->parent;
>> +
>> +	cmd->sa.host_use_b[0] |= B_DONE;
> 
> This is done with no locking, but is not an atomic operation. Are there
> any cases where two simultaneous writers of this field could result
> in losing setting of a bit? Example, they both read at the same time
> and read zero, then each writer does their store, so the last one wins.

As of v2, there is not a case of multiple writers. However, this may
change due to some modifications being made for how we handle
internal commands (anything that isn't coming from the SCSI stack)
and their timeouts. If it does change to where we do have multiple
writers we will lock where appropriate.


>> +	wait_event(cfg->tmf_wait_q, !cfg->tmf_active);
> 
> You don't seem to be doing any locking or barrier semantics around the setting
> or checking of tmf_active. Additionally, since there is no locking, and its a bit
> field it will take a read / modify write to the byte its in, potentially messing
> with the other bit fields if you multiple concurrent bit changes going on without locking.

Ok, we'll look at revising this.


>> +	nseg = scsi_dma_map(scp);
>> +	if (unlikely(nseg < 0)) {
>> +		dev_err(&pdev->dev, "%s: Fail DMA map! nseg=%d\n",
>> +			__func__, nseg);
>> +		rc = SCSI_MLQUEUE_DEVICE_BUSY;
> 
> This should probably be SCSI_MLQUEUE_HOST_BUSY instead, since it would be
> host resources you are short on and not device resources.

Ok, will replace with SCSI_MLQUEUE_HOST_BUSY.


>> +	ncount = scsi_sg_count(scp);
>> +	scsi_for_each_sg(scp, sg, ncount, i) {
>> +		cmd->rcb.data_len = (sg_dma_len(sg));
>> +		cmd->rcb.data_ea = (sg_dma_address(sg));
> 
> What's up with the extra parenthesis?

We put those in just for you. =)

In all seriousness, I think those where just a hold over from bringup when we were
[incorrectly] adding in offsets to these values. We'll drop the extra parenths.


>> +static int cxlflash_eh_device_reset_handler(struct scsi_cmnd *scp)
>> +{
>> +	int rc = SUCCESS;
>> +	struct Scsi_Host *host = scp->device->host;
>> +	struct cxlflash_cfg *cfg = (struct cxlflash_cfg *)host->hostdata;
>> +	struct afu *afu = cfg->afu;
>> +
>> +	pr_debug("%s: (scp=%p) %d/%d/%d/%llu "
>> +		 "cdb=(%08X-%08X-%08X-%08X)\n", __func__, scp,
>> +		 host->host_no, scp->device->channel,
>> +		 scp->device->id, scp->device->lun,
>> +		 get_unaligned_be32(&((u32 *)scp->cmnd)[0]),
>> +		 get_unaligned_be32(&((u32 *)scp->cmnd)[1]),
>> +		 get_unaligned_be32(&((u32 *)scp->cmnd)[2]),
>> +		 get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
>> +
>> +	scp->result = (DID_OK << 16);
> 
> Don't think this should be needed. scsi eh will requeue or fail the
> command as appropriate.
> 

Ok, will remove this.


>> +static int cxlflash_eh_host_reset_handler(struct scsi_cmnd *scp)
>> +{
>> +	int rc = SUCCESS;
>> +	int rcr = 0;
>> +	struct Scsi_Host *host = scp->device->host;
>> +	struct cxlflash_cfg *cfg = (struct cxlflash_cfg *)host->hostdata;
>> +
>> +	pr_debug("%s: (scp=%p) %d/%d/%d/%llu "
>> +		 "cdb=(%08X-%08X-%08X-%08X)\n", __func__, scp,
>> +		 host->host_no, scp->device->channel,
>> +		 scp->device->id, scp->device->lun,
>> +		 get_unaligned_be32(&((u32 *)scp->cmnd)[0]),
>> +		 get_unaligned_be32(&((u32 *)scp->cmnd)[1]),
>> +		 get_unaligned_be32(&((u32 *)scp->cmnd)[2]),
>> +		 get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
>> +
>> +	scp->result = (DID_OK << 16);
> 
> Don't think this should be needed. scsi eh will requeue or fail the
> command as appropriate.

Ditto.


>> +static ssize_t cxlflash_show_dev_mode(struct device *dev,
>> +				      struct device_attribute *attr, char *buf)
>> +{
>> +	struct scsi_device *sdev = to_scsi_device(dev);
>> +	void *lun_info = (void *)sdev->hostdata;
>> +	char *legacy = "legacy",
>> +	     *superpipe = "superpipe";
>> +
>> +	return snprintf(buf, PAGE_SIZE, "%s\n", lun_info ? superpipe : legacy);
> 
> Why bother creating these legacy and superpipe locals at all? Just do:
> 
> return snprintf(buf, PAGE_SIZE, "%s\n", lun_info ? "superpipe" : "legacy");

Sure, we can simplify this.


>> +	switch (cfg->init_state) {
>> +	case INIT_STATE_SCSI:
>> +		scsi_remove_host(cfg->host);
>> +		dev_dbg(&pdev->dev, "%s: after scsi_remove_host!\n", __func__);
>> +		scsi_host_put(cfg->host);
>> +		dev_dbg(&pdev->dev, "%s: after scsi_host_put!\n", __func__);
> 
> Would probably be good to scrub the code for some of these debug statements.
> Some are fine to leave in the code if useful, but ones like these above should
> probably go.

We'll look at scrubbing these and similar debug prints.


>> +static const struct asyc_intr_info ainfo[] = {
>> +	{SISL_ASTATUS_FC0_OTHER, "fc 0: other error", 0,
>> +		CLR_FC_ERROR | LINK_RESET},
>> +	{SISL_ASTATUS_FC0_LOGO, "fc 0: target initiated LOGO", 0, 0},
>> +	{SISL_ASTATUS_FC0_CRC_T, "fc 0: CRC threshold exceeded", 0, LINK_RESET},
>> +	{SISL_ASTATUS_FC0_LOGI_R, "fc 0: login timed out, retrying", 0, 0},
>> +	{SISL_ASTATUS_FC0_LOGI_F, "fc 0: login failed", 0, CLR_FC_ERROR},
>> +	{SISL_ASTATUS_FC0_LOGI_S, "fc 0: login succeeded", 0, 0},
>> +	{SISL_ASTATUS_FC0_LINK_DN, "fc 0: link down", 0, 0},
>> +	{SISL_ASTATUS_FC0_LINK_UP, "fc 0: link up", 0, 0},
> 
> Does "fc 0" here mean "port 0"?

Correct. Similarly fc1 refers to port 1. We'll change the output string to help clarify this.


>> +static irqreturn_t cxlflash_async_err_irq(int irq, void *data)
>> +{
>> +	struct afu *afu = (struct afu *)data;
>> +	struct cxlflash_cfg *cfg;
>> +	u64 reg_unmasked;
>> +	const struct asyc_intr_info *info;
>> +	volatile struct sisl_global_map *global = &afu->afu_map->global;
> 
> Does this need to be volatile? 

This likely does not need to be volatile as we're using the MMIO accessors. We'll revisit
the places we're using volatile in the driver to see if they're really needed.


>> +int cxlflash_send_cmd(struct afu *afu, struct afu_cmd *cmd)
>> +{
>> +	int nretry = 0;
>> +	int rc = 0;
>> +
>> +	if (afu->room == 0)
>> +		do {
>> +			afu->room = readq_be(&afu->host_map->cmd_room);
>> +			udelay(nretry);
>> +		} while ((afu->room == 0) && (nretry++ < MC_ROOM_RETRY_CNT));
> 
> How does afu->room ever go to zero? I see a couple of places where you read it
> from the device if it is already zero, but it seems like once you read a non-zero
> value from the device you'll never read it again. 
> 
> Do you expect to get into this leg of code often? Would it be better to
> return SCSI_MLQUEUE_HOST_BUSY here instead?

Good catch. Will need to update afu->room.

We don't expect to get into this leg of code very often.

SCSI_MLQUEUE_HOST_BUSY would be appropriate if afu->room is zero here. However in
the other case (context reset) we do want to try our best and wait for room to be available so
that we can proceed with the reset.


>> +void cxlflash_wait_resp(struct afu *afu, struct afu_cmd *cmd)
>> +{
>> +	while (!(cmd->sa.host_use_b[0] & B_DONE))
>> +		cpu_relax();
> 
> Could you wait on the sync_wait_q here instead? 

Ok, we'll look into sleeping instead of this busy-wait.


>> +int cxlflash_afu_sync(struct afu *afu, ctx_hndl_t ctx_hndl_u,
>> +		      res_hndl_t res_hndl_u, u8 mode)
>> +{
>> +	struct cxlflash_cfg *cfg = afu->parent;
>> +	struct afu_cmd *cmd;
>> +	int rc = 0;
>> +	int retry_cnt = 0;
>> +
>> +	while (cfg->sync_active) {
>> +		pr_debug("%s: sync issued while one is active\n", __func__);
>> +		wait_event(cfg->sync_wait_q, !cfg->sync_active);
> 
> The comment before the function indicates this function can be called from interrupt
> context, yet here you are doing a wait_event (the udelay is also not nice at interrupt
> level). Looking at the code, though, it seems like this function only gets called from
> afu_link_reset, which only gets called from cxlflash_worker_thread, so I'm guessing
> the comment is just not correct.
> 
> However, cxlflash_worker_thread calls this with the host_lock held, so if you ever
> got in the while loop here, you'd go to sleep with your host lock held.  Also, cxlflash_worker_thread
> cals afu_link_reset, which then calls wait_port_offline, which calls msleep, again
> with the host lock held.

Yep, we've already addressed this since pushing out v2. We're now serializing this routine
with a mutex and have fixed all call paths to ensure we're not called on interrupt context (or
with the host spin lock held) so we're safe to sleep/delay.
> 
>> +	}
>> +
>> +retry:
>> +	cmd = cxlflash_cmd_checkout(afu);
>> +	if (unlikely(!cmd)) {
>> +		retry_cnt++;
>> +		pr_debug("%s: could not get command on attempt %d\n",
>> +			 __func__, retry_cnt);
>> +		udelay(1000*retry_cnt);
> 
> The comment before the function indicates this function can be called from interrupt context.

With the updates mentioned above we're safe to delay here (process context only).


>> +	if ((cmd->sa.ioasc != 0) || (cmd->sa.host_use_b[0] & B_ERROR)) {
>> +		rc = -1;
>> +		/* B_ERROR is set on timeout */
> 
> Where does this happen? Is the AFU doing this? If so, perhaps host_use_b
> is not the best name for this field?

The host_use field is not set/used by the AFU. The setting of this on timeout is
missing in v2. We'll add it in v3 along with appropriate serialization to handle
multiple writers (interrupt and timeout handlers).


>> +
>> +	/* These fields are defined by the SISlite architecture for the
>> +	 * host to use as they see fit for their implementation.
>> +	 */
>> +	union {
>> +		u64 host_use[4];
>> +		u8 host_use_b[32];
>> +	};
>> +};
> 
> Should this have __attribute__(packed)?

We're likely fine without it but it won't hurt to add it, so we'll do just that.


>> +struct sisl_rht_entry_f1 {
>> +	u64 lun_id;
>> +	union {
>> +		struct {
>> +			u8 valid;
>> +			u8 rsvd[5];
>> +			u8 fp;
>> +			u8 port_sel;
>> +		};
>> +
>> +		u64 dw;
>> +	};
>> +} __aligned(16);
> 
> For structures like these that look to be shared with the hardware, what you probably want is:
> 
> __attribute__((packed, aligned (16)));

Sure, we can do this.

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index b021bcb..ebb12a7 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -345,6 +345,7 @@  source "drivers/scsi/cxgbi/Kconfig"
 source "drivers/scsi/bnx2i/Kconfig"
 source "drivers/scsi/bnx2fc/Kconfig"
 source "drivers/scsi/be2iscsi/Kconfig"
+source "drivers/scsi/cxlflash/Kconfig"
 
 config SGIWD93_SCSI
 	tristate "SGI WD93C93 SCSI Driver"
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index dee160a..6a83b17 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -78,6 +78,7 @@  obj-$(CONFIG_SCSI_IN2000)	+= in2000.o
 obj-$(CONFIG_SCSI_GENERIC_NCR5380) += g_NCR5380.o
 obj-$(CONFIG_SCSI_GENERIC_NCR5380_MMIO) += g_NCR5380_mmio.o
 obj-$(CONFIG_SCSI_NCR53C406A)	+= NCR53c406a.o
+obj-$(CONFIG_CXLFLASH)		+= cxlflash/
 obj-$(CONFIG_SCSI_NCR_D700)	+= 53c700.o NCR_D700.o
 obj-$(CONFIG_SCSI_NCR_Q720)	+= NCR_Q720_mod.o
 obj-$(CONFIG_SCSI_SYM53C416)	+= sym53c416.o
diff --git a/drivers/scsi/cxlflash/Kconfig b/drivers/scsi/cxlflash/Kconfig
new file mode 100644
index 0000000..e98c3f6
--- /dev/null
+++ b/drivers/scsi/cxlflash/Kconfig
@@ -0,0 +1,11 @@ 
+#
+# IBM CXL-attached Flash Accelerator SCSI Driver
+#
+
+config CXLFLASH
+	tristate "Support for IBM CAPI Flash"
+	depends on CXL
+	default m
+	help
+	  Allows CAPI Accelerated IO to Flash
+	  If unsure, say N.
diff --git a/drivers/scsi/cxlflash/Makefile b/drivers/scsi/cxlflash/Makefile
new file mode 100644
index 0000000..dc95e20
--- /dev/null
+++ b/drivers/scsi/cxlflash/Makefile
@@ -0,0 +1,2 @@ 
+obj-$(CONFIG_CXLFLASH) += cxlflash.o
+cxlflash-y += main.o
diff --git a/drivers/scsi/cxlflash/common.h b/drivers/scsi/cxlflash/common.h
new file mode 100644
index 0000000..990a7c4
--- /dev/null
+++ b/drivers/scsi/cxlflash/common.h
@@ -0,0 +1,180 @@ 
+/*
+ * CXL Flash Device Driver
+ *
+ * Written by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>, IBM Corporation
+ *             Matthew R. Ochs <mrochs@linux.vnet.ibm.com>, IBM Corporation
+ *
+ * Copyright (C) 2015 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _CXLFLASH_COMMON_H
+#define _CXLFLASH_COMMON_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_device.h>
+
+
+#define MAX_CONTEXT  CXLFLASH_MAX_CONTEXT       /* num contexts per afu */
+
+#define CXLFLASH_BLOCK_SIZE	4096	/* 4K blocks */
+#define CXLFLASH_MAX_XFER_SIZE	16777216	/* 16MB transfer */
+#define CXLFLASH_MAX_SECTORS	(CXLFLASH_MAX_XFER_SIZE/CXLFLASH_BLOCK_SIZE)
+
+#define NUM_RRQ_ENTRY    16     /* for master issued cmds */
+#define MAX_RHT_PER_CONTEXT (PAGE_SIZE / sizeof(struct sisl_rht_entry))
+
+/* AFU command retry limit */
+#define MC_RETRY_CNT         5	/* sufficient for SCSI check and
+				   certain AFU errors */
+
+/* Command management definitions */
+#define CXLFLASH_NUM_CMDS	(2 * CXLFLASH_MAX_CMDS)	/* Must be a pow2 for
+							   alignment and more
+							   efficient array
+							   index derivation
+							 */
+
+#define CXLFLASH_MAX_CMDS               16
+#define CXLFLASH_MAX_CMDS_PER_LUN       CXLFLASH_MAX_CMDS
+
+/* Check for power of 2 at compile time */
+#define NOT_POW2(_x) ((_x) && ((_x) & ((_x) - 1)))
+#if NOT_POW2(CXLFLASH_NUM_CMDS)
+#error "CXLFLASH_NUM_CMDS is not a power of 2!"
+#endif
+
+/* AFU defines a fixed size of 4K for command buffers (borrow 4K page define) */
+#define CMD_BUFSIZE     SIZE_4K
+
+/* flags in IOA status area for host use */
+#define B_DONE       0x01
+#define B_ERROR      0x02	/* set with B_DONE */
+#define B_TIMEOUT    0x04	/* set with B_DONE & B_ERROR */
+
+enum cxlflash_lr_state {
+	LINK_RESET_INVALID,
+	LINK_RESET_REQUIRED,
+	LINK_RESET_COMPLETE
+};
+
+enum cxlflash_init_state {
+	INIT_STATE_NONE,
+	INIT_STATE_AFU,
+	INIT_STATE_PCI,
+	INIT_STATE_SCSI
+};
+
+/*
+ * Each context has its own set of resource handles that is visible
+ * only from that context.
+ */
+
+struct cxlflash_cfg {
+	struct afu *afu;
+	struct cxl_context *mcctx;
+
+	struct pci_dev *dev;
+	struct pci_device_id *dev_id;
+	struct Scsi_Host *host;
+
+	unsigned long cxlflash_regs_pci;
+
+	wait_queue_head_t reset_wait_q;
+	wait_queue_head_t msi_wait_q;
+	wait_queue_head_t eeh_wait_q;
+
+	struct work_struct work_q;
+	enum cxlflash_init_state init_state;
+	enum cxlflash_lr_state lr_state;
+	int lr_port;
+
+	struct cxl_afu *cxl_afu;
+
+	struct pci_pool *cxlflash_cmd_pool;
+	struct pci_dev *parent_dev;
+
+	wait_queue_head_t tmf_wait_q;
+	wait_queue_head_t sync_wait_q;
+	u8 tmf_active:1;
+	u8 sync_active:1;
+};
+
+struct afu_cmd {
+	struct sisl_ioarcb rcb;	/* IOARCB (cache line aligned) */
+	struct sisl_ioasa sa;	/* IOASA must follow IOARCB */
+	spinlock_t slock;
+	struct timer_list timer;
+	char *buf;		/* per command buffer */
+	struct afu *parent;
+	int slot;
+	atomic_t free;
+	u8 special:1;
+	u8 internal:1;
+	u8 sync:1;
+
+	/* As per the SISLITE spec the IOARCB EA has to be 16-byte aligned.
+	 * However for performance reasons the IOARCB/IOASA should be
+	 * cache line aligned.
+	 */
+} __aligned(cache_line_size());
+
+struct afu {
+	/* Stuff requiring alignment go first. */
+
+	u64 rrq_entry[NUM_RRQ_ENTRY];	/* 128B RRQ */
+	/*
+	 * Command & data for AFU commands.
+	 */
+	struct afu_cmd cmd[CXLFLASH_NUM_CMDS];
+
+	/* Beware of alignment till here. Preferably introduce new
+	 * fields after this point
+	 */
+
+	/* AFU HW */
+	int afu_fd;
+	struct cxl_ioctl_start_work work;
+	volatile struct cxlflash_afu_map *afu_map;	/* entire MMIO map */
+	volatile struct sisl_host_map *host_map;	/* MC host map */
+	volatile struct sisl_ctrl_map *ctrl_map;	/* MC control map */
+
+	ctx_hndl_t ctx_hndl;	/* master's context handle */
+	u64 *hrrq_start;
+	u64 *hrrq_end;
+	volatile u64 *hrrq_curr;
+	bool toggle;
+	u64 room;
+	u64 hb;
+	u32 cmd_couts;		/* Number of command checkouts */
+	u32 internal_lun;	/* User-desired LUN mode for this AFU */
+
+	char version[8];
+	u64 interface_version;
+
+	struct cxlflash_cfg *parent; /* Pointer back to parent cxlflash_cfg */
+
+};
+
+static inline u64 lun_to_lunid(u64 lun)
+{
+	u64 lun_id;
+
+	int_to_scsilun(lun, (struct scsi_lun *)&lun_id);
+	return swab64(lun_id);
+}
+
+int cxlflash_send_cmd(struct afu *, struct afu_cmd *);
+void cxlflash_wait_resp(struct afu *, struct afu_cmd *);
+int cxlflash_afu_reset(struct cxlflash_cfg *);
+struct afu_cmd *cxlflash_cmd_checkout(struct afu *);
+void cxlflash_cmd_checkin(struct afu_cmd *);
+int cxlflash_afu_sync(struct afu *, ctx_hndl_t, res_hndl_t, u8);
+#endif /* ifndef _CXLFLASH_COMMON_H */
+
diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
new file mode 100644
index 0000000..d16f8b2
--- /dev/null
+++ b/drivers/scsi/cxlflash/main.c
@@ -0,0 +1,2242 @@ 
+/*
+ * CXL Flash Device Driver
+ *
+ * Written by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>, IBM Corporation
+ *             Matthew R. Ochs <mrochs@linux.vnet.ibm.com>, IBM Corporation
+ *
+ * Copyright (C) 2015 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/delay.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include <asm/unaligned.h>
+
+#include <misc/cxl.h>
+
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_host.h>
+
+#include "main.h"
+#include "sislite.h"
+#include "common.h"
+
+MODULE_DESCRIPTION(CXLFLASH_ADAPTER_NAME);
+MODULE_AUTHOR("Manoj N. Kumar <manoj@linux.vnet.ibm.com>");
+MODULE_AUTHOR("Matthew R. Ochs <mrochs@linux.vnet.ibm.com>");
+MODULE_LICENSE("GPL");
+
+
+/**
+ * cxlflash_cmd_checkout() - checks out an AFU command
+ * @afu:	AFU to checkout from.
+ *
+ * Commands are checked out in a round-robin fashion. Note that since
+ * the command pool is larger than the hardware queue, the majority of
+ * times we will only loop once or twice before getting a command. The
+ * buffer and CDB within the command are initialized (zeroed) prior to
+ * returning.
+ *
+ * Return: The checked out command or NULL when command pool is empty.
+ */
+struct afu_cmd *cxlflash_cmd_checkout(struct afu *afu)
+{
+	int k, dec = CXLFLASH_NUM_CMDS;
+	struct afu_cmd *cmd;
+
+	while (dec--) {
+		k = (afu->cmd_couts++ & (CXLFLASH_NUM_CMDS - 1));
+
+		cmd = &afu->cmd[k];
+
+		if (!atomic_dec_if_positive(&cmd->free)) {
+			pr_debug("%s: returning found index=%d\n",
+				 __func__, cmd->slot);
+			memset(cmd->buf, 0, CMD_BUFSIZE);
+			memset(cmd->rcb.cdb, 0, sizeof(cmd->rcb.cdb));
+			return cmd;
+		}
+	}
+
+	return NULL;
+}
+
+/**
+ * cxlflash_cmd_checkin() - checks in an AFU command
+ * @cmd:	AFU command to checkin.
+ *
+ * Safe to pass commands that have already been checked in. Several
+ * internal tracking fields are reset as part of the checkin.
+ */
+void cxlflash_cmd_checkin(struct afu_cmd *cmd)
+{
+	if (unlikely(atomic_inc_return(&cmd->free) != 1)) {
+		pr_err("%s: Freeing cmd (%d) that is not in use!\n",
+		       __func__, cmd->slot);
+		return;
+	}
+
+	cmd->special = 0;
+	cmd->internal = false;
+	cmd->sync = false;
+	cmd->rcb.timeout = 0;
+
+	pr_debug("%s: releasing cmd index=%d\n", __func__, cmd->slot);
+}
+
+/**
+ * process_cmd_err() - command error handler
+ * @cmd:	AFU command that experienced the error.
+ * @scp:	SCSI command associated with the AFU command in error.
+ *
+ * Translates error bits from AFU command to SCSI command results.
+ */
+static void process_cmd_err(struct afu_cmd *cmd, struct scsi_cmnd *scp)
+{
+	struct sisl_ioarcb *ioarcb;
+	struct sisl_ioasa *ioasa;
+
+	if (unlikely(!cmd))
+		return;
+
+	ioarcb = &(cmd->rcb);
+	ioasa = &(cmd->sa);
+
+	if (ioasa->rc.flags & SISL_RC_FLAGS_UNDERRUN) {
+		pr_debug("%s: cmd underrun cmd = %p scp = %p\n",
+			 __func__, cmd, scp);
+		scp->result = (DID_ERROR << 16);
+	}
+
+	if (ioasa->rc.flags & SISL_RC_FLAGS_OVERRUN) {
+		pr_debug("%s: cmd underrun cmd = %p scp = %p\n",
+			 __func__, cmd, scp);
+		scp->result = (DID_ERROR << 16);
+	}
+
+	pr_debug("%s: cmd failed afu_rc=%d scsi_rc=%d fc_rc=%d "
+		 "afu_extra=0x%X, scsi_entra=0x%X, fc_extra=0x%X\n",
+		 __func__, ioasa->rc.afu_rc, ioasa->rc.scsi_rc,
+		 ioasa->rc.fc_rc, ioasa->afu_extra, ioasa->scsi_extra,
+		 ioasa->fc_extra);
+
+	if (ioasa->rc.scsi_rc) {
+		/* We have a SCSI status */
+		if (ioasa->rc.flags & SISL_RC_FLAGS_SENSE_VALID)
+			memcpy(scp->sense_buffer, ioasa->sense_data,
+			       SISL_SENSE_DATA_LEN);
+		scp->result = ioasa->rc.scsi_rc | (DID_ERROR << 16);
+	}
+
+	/*
+	 * We encountered an error. Set scp->result based on nature
+	 * of error.
+	 */
+	if (ioasa->rc.fc_rc) {
+		/* We have an FC status */
+		switch (ioasa->rc.fc_rc) {
+		case SISL_FC_RC_RESIDERR:
+			/* Resid mismatch between adapter and device */
+		case SISL_FC_RC_TGTABORT:
+		case SISL_FC_RC_ABORTOK:
+		case SISL_FC_RC_ABORTFAIL:
+		case SISL_FC_RC_LINKDOWN:
+		case SISL_FC_RC_NOLOGI:
+		case SISL_FC_RC_ABORTPEND:
+			scp->result = (DID_IMM_RETRY << 16);
+			break;
+		case SISL_FC_RC_RESID:
+			/* This indicates an FCP resid underrun */
+			if (!(ioasa->rc.flags & SISL_RC_FLAGS_OVERRUN)) {
+				/* If the SISL_RC_FLAGS_OVERRUN flag was set,
+				 * then we will handle this error else where.
+				 * If not then we must handle it here.
+				 * This is probably an AFU bug. We will
+				 * attempt a retry to see if that resolves it.
+				 */
+				scp->result = (DID_IMM_RETRY << 16);
+			}
+			break;
+		case SISL_FC_RC_WRABORTPEND:
+		case SISL_FC_RC_NOEXP:
+		case SISL_FC_RC_INUSE:
+			scp->result = (DID_ERROR << 16);
+			break;
+		}
+	}
+
+	if (ioasa->rc.afu_rc) {
+		/* We have an AFU error */
+		switch (ioasa->rc.afu_rc) {
+		case SISL_AFU_RC_NO_CHANNELS:
+			scp->result = (DID_MEDIUM_ERROR << 16);
+			break;
+		case SISL_AFU_RC_DATA_DMA_ERR:
+			switch (ioasa->afu_extra) {
+			case SISL_AFU_DMA_ERR_PAGE_IN:
+				/* Retry */
+				scp->result = (DID_IMM_RETRY << 16);
+				break;
+			case SISL_AFU_DMA_ERR_INVALID_EA:
+			default:
+				scp->result = (DID_ERROR << 16);
+			}
+			break;
+		case SISL_AFU_RC_OUT_OF_DATA_BUFS:
+			/* Retry */
+			scp->result = (DID_ALLOC_FAILURE << 16);
+			break;
+		default:
+			scp->result = (DID_ERROR << 16);
+		}
+	}
+}
+
+/**
+ * cmd_complete() - command completion handler
+ * @cmd:	AFU command that has completed.
+ *
+ * Prepares and submits command that has either completed or timed out to
+ * the SCSI stack. Checks AFU command back into command pool.
+ */
+static void cmd_complete(struct afu_cmd *cmd)
+{
+	struct scsi_cmnd *scp;
+	struct afu *afu = cmd->parent;
+	struct cxlflash_cfg *cfg = afu->parent;
+
+	cmd->sa.host_use_b[0] |= B_DONE;
+
+	/* already stopped if timer fired */
+	del_timer(&cmd->timer);
+
+	if (cmd->rcb.scp) {
+		scp = cmd->rcb.scp;
+		if (cmd->sa.rc.afu_rc || cmd->sa.rc.scsi_rc ||
+		    cmd->sa.rc.fc_rc)
+			process_cmd_err(cmd, scp);
+		else
+			scp->result = (DID_OK << 16);
+
+		pr_debug("%s: calling scsi_set_resid, scp=%p "
+			 "result=%X resid=%d\n", __func__,
+			 cmd->rcb.scp, scp->result, cmd->sa.resid);
+
+		scsi_set_resid(scp, cmd->sa.resid);
+		scsi_dma_unmap(scp);
+		scp->scsi_done(scp);
+		cmd->rcb.scp = NULL;
+		if (cmd->special) {
+			cfg->tmf_active = false;
+			wake_up_all(&cfg->tmf_wait_q);
+		}
+	}
+	if (cmd->sync) {
+		cfg->sync_active = false;
+		wake_up_all(&cfg->sync_wait_q);
+	}
+
+	/* Done with command */
+	cxlflash_cmd_checkin(cmd);
+}
+
+/**
+ * send_tmf() - sends a Task Management Function (TMF)
+ * @afu:	AFU to checkout from.
+ * @scp:	SCSI command from stack.
+ * @tmfcmd:	TMF command to send.
+ *
+ * Return:
+ *	0 on success
+ *	SCSI_MLQUEUE_HOST_BUSY when host is busy
+ */
+static int send_tmf(struct afu *afu, struct scsi_cmnd *scp, u64 tmfcmd)
+{
+	struct afu_cmd *cmd;
+
+	u32 port_sel = scp->device->channel + 1;
+	short lflag = 0;
+	struct Scsi_Host *host = scp->device->host;
+	struct cxlflash_cfg *cfg = (struct cxlflash_cfg *)host->hostdata;
+	int rc = 0;
+
+	wait_event(cfg->tmf_wait_q, !cfg->tmf_active);
+
+	cmd = cxlflash_cmd_checkout(afu);
+	if (unlikely(!cmd)) {
+		pr_err("%s: could not get a free command\n", __func__);
+		rc = SCSI_MLQUEUE_HOST_BUSY;
+		goto out;
+	}
+
+	cmd->rcb.ctx_id = afu->ctx_hndl;
+	cmd->rcb.port_sel = port_sel;
+	cmd->rcb.lun_id = lun_to_lunid(scp->device->lun);
+
+	lflag = SISL_REQ_FLAGS_TMF_CMD;
+
+	cmd->rcb.req_flags = (SISL_REQ_FLAGS_PORT_LUN_ID |
+				SISL_REQ_FLAGS_SUP_UNDERRUN | lflag);
+
+	/* Stash the scp in the reserved field, for reuse during interrupt */
+	cmd->rcb.scp = scp;
+	cmd->special = 0x1;
+	cfg->tmf_active = true;
+
+	cmd->sa.host_use_b[1] = 0;	/* reset retry cnt */
+
+	/* Copy the CDB from the cmd passed in */
+	memcpy(cmd->rcb.cdb, &tmfcmd, sizeof(tmfcmd));
+
+	/* Send the command */
+	rc = cxlflash_send_cmd(afu, cmd);
+	if (!rc)
+		wait_event(cfg->tmf_wait_q, !cfg->tmf_active);
+out:
+	return rc;
+
+}
+
+/**
+ * cxlflash_driver_info() - information handler for this host driver
+ * @host:	SCSI host associated with device.
+ *
+ * Return: A string describing the device.
+ */
+static const char *cxlflash_driver_info(struct Scsi_Host *host)
+{
+	return CXLFLASH_ADAPTER_NAME;
+}
+
+/**
+ * cxlflash_queuecommand() - sends a mid-layer request
+ * @host:	SCSI host associated with device.
+ * @scp:	SCSI command to send.
+ *
+ * Return:
+ *	0 on success
+ *	SCSI_MLQUEUE_DEVICE_BUSY when device is busy
+ *	SCSI_MLQUEUE_HOST_BUSY when host is busy
+ */
+static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp)
+{
+	struct cxlflash_cfg *cfg = (struct cxlflash_cfg *)host->hostdata;
+	struct afu *afu = cfg->afu;
+	struct pci_dev *pdev = cfg->dev;
+	struct afu_cmd *cmd;
+	u32 port_sel = scp->device->channel + 1;
+	int nseg, i, ncount;
+	struct scatterlist *sg;
+	short lflag = 0;
+	int rc = 0;
+
+	pr_debug("%s: (scp=%p) %d/%d/%d/%llu cdb=(%08X-%08X-%08X-%08X)\n",
+		 __func__, scp, host->host_no, scp->device->channel,
+		 scp->device->id, scp->device->lun,
+		 get_unaligned_be32(&((u32 *)scp->cmnd)[0]),
+		 get_unaligned_be32(&((u32 *)scp->cmnd)[1]),
+		 get_unaligned_be32(&((u32 *)scp->cmnd)[2]),
+		 get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
+
+	wait_event(cfg->tmf_wait_q, !cfg->tmf_active);
+
+	cmd = cxlflash_cmd_checkout(afu);
+	if (unlikely(!cmd)) {
+		pr_err("%s: could not get a free command\n", __func__);
+		rc = SCSI_MLQUEUE_HOST_BUSY;
+		goto out;
+	}
+
+	cmd->rcb.ctx_id = afu->ctx_hndl;
+	cmd->rcb.port_sel = port_sel;
+	cmd->rcb.lun_id = lun_to_lunid(scp->device->lun);
+
+	if (scp->sc_data_direction == DMA_TO_DEVICE)
+		lflag = SISL_REQ_FLAGS_HOST_WRITE;
+	else
+		lflag = SISL_REQ_FLAGS_HOST_READ;
+
+	cmd->rcb.req_flags = (SISL_REQ_FLAGS_PORT_LUN_ID |
+				SISL_REQ_FLAGS_SUP_UNDERRUN | lflag);
+
+	/* Stash the scp in the reserved field, for reuse during interrupt */
+	cmd->rcb.scp = scp;
+
+	cmd->sa.host_use_b[1] = 0;	/* reset retry cnt */
+
+	nseg = scsi_dma_map(scp);
+	if (unlikely(nseg < 0)) {
+		dev_err(&pdev->dev, "%s: Fail DMA map! nseg=%d\n",
+			__func__, nseg);
+		rc = SCSI_MLQUEUE_DEVICE_BUSY;
+		goto out;
+	}
+
+	ncount = scsi_sg_count(scp);
+	scsi_for_each_sg(scp, sg, ncount, i) {
+		cmd->rcb.data_len = (sg_dma_len(sg));
+		cmd->rcb.data_ea = (sg_dma_address(sg));
+	}
+
+	/* Copy the CDB from the scsi_cmnd passed in */
+	memcpy(cmd->rcb.cdb, scp->cmnd, sizeof(cmd->rcb.cdb));
+
+	/* Send the command */
+	rc = cxlflash_send_cmd(afu, cmd);
+
+out:
+	return rc;
+}
+
+/**
+ * cxlflash_eh_device_reset_handler() - reset a single LUN
+ * @scp:	SCSI command to send.
+ *
+ * Return:
+ *	SUCCESS as defined in scsi/scsi.h
+ *	FAILED as defined in scsi/scsi.h
+ */
+static int cxlflash_eh_device_reset_handler(struct scsi_cmnd *scp)
+{
+	int rc = SUCCESS;
+	struct Scsi_Host *host = scp->device->host;
+	struct cxlflash_cfg *cfg = (struct cxlflash_cfg *)host->hostdata;
+	struct afu *afu = cfg->afu;
+
+	pr_debug("%s: (scp=%p) %d/%d/%d/%llu "
+		 "cdb=(%08X-%08X-%08X-%08X)\n", __func__, scp,
+		 host->host_no, scp->device->channel,
+		 scp->device->id, scp->device->lun,
+		 get_unaligned_be32(&((u32 *)scp->cmnd)[0]),
+		 get_unaligned_be32(&((u32 *)scp->cmnd)[1]),
+		 get_unaligned_be32(&((u32 *)scp->cmnd)[2]),
+		 get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
+
+	scp->result = (DID_OK << 16);
+	send_tmf(afu, scp, TMF_LUN_RESET);
+
+	pr_debug("%s: returning rc=%d\n", __func__, rc);
+	return rc;
+}
+
+/**
+ * cxlflash_eh_host_reset_handler() - reset the host adapter
+ * @scp:	SCSI command from stack identifying host.
+ *
+ * Return:
+ *	SUCCESS as defined in scsi/scsi.h
+ *	FAILED as defined in scsi/scsi.h
+ */
+static int cxlflash_eh_host_reset_handler(struct scsi_cmnd *scp)
+{
+	int rc = SUCCESS;
+	int rcr = 0;
+	struct Scsi_Host *host = scp->device->host;
+	struct cxlflash_cfg *cfg = (struct cxlflash_cfg *)host->hostdata;
+
+	pr_debug("%s: (scp=%p) %d/%d/%d/%llu "
+		 "cdb=(%08X-%08X-%08X-%08X)\n", __func__, scp,
+		 host->host_no, scp->device->channel,
+		 scp->device->id, scp->device->lun,
+		 get_unaligned_be32(&((u32 *)scp->cmnd)[0]),
+		 get_unaligned_be32(&((u32 *)scp->cmnd)[1]),
+		 get_unaligned_be32(&((u32 *)scp->cmnd)[2]),
+		 get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
+
+	scp->result = (DID_OK << 16);
+	rcr = cxlflash_afu_reset(cfg);
+	if (rcr == 0)
+		rc = SUCCESS;
+	else
+		rc = FAILED;
+
+	pr_debug("%s: returning rc=%d\n", __func__, rc);
+	return rc;
+}
+
+/**
+ * cxlflash_change_queue_depth() - change the queue depth for the device
+ * @sdev:	SCSI device destined for queue depth change.
+ * @qdepth:	Requested queue depth value to set.
+ *
+ * The requested queue depth is capped to the maximum supported value.
+ *
+ * Return: The actual queue depth set.
+ */
+static int cxlflash_change_queue_depth(struct scsi_device *sdev, int qdepth)
+{
+
+	if (qdepth > CXLFLASH_MAX_CMDS_PER_LUN)
+		qdepth = CXLFLASH_MAX_CMDS_PER_LUN;
+
+	scsi_change_queue_depth(sdev, qdepth);
+	return sdev->queue_depth;
+}
+
+/**
+ * cxlflash_show_port_status() - queries and presents the current port status
+ * @dev:	Generic device associated with the host owning the port.
+ * @attr:	Device attribute representing the port.
+ * @buf:	Buffer of length PAGE_SIZE to report back port status in ASCII.
+ *
+ * Return: The size of the ASCII string returned in @buf.
+ */
+static ssize_t cxlflash_show_port_status(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	struct cxlflash_cfg *cfg = (struct cxlflash_cfg *)shost->hostdata;
+	struct afu *afu = cfg->afu;
+
+	char *disp_status;
+	int rc;
+	u32 port;
+	u64 status;
+	volatile u64 *fc_regs;
+
+	rc = kstrtouint((attr->attr.name + 4), 10, &port);
+	if (rc || (port > NUM_FC_PORTS))
+		return 0;
+
+	fc_regs = &afu->afu_map->global.fc_regs[port][0];
+	status =
+	    (readq_be(&fc_regs[FC_MTIP_STATUS / 8]) & FC_MTIP_STATUS_MASK);
+
+	if (status == FC_MTIP_STATUS_ONLINE)
+		disp_status = "online";
+	else if (status == FC_MTIP_STATUS_OFFLINE)
+		disp_status = "offline";
+	else
+		disp_status = "unknown";
+
+	return snprintf(buf, PAGE_SIZE, "%s\n", disp_status);
+}
+
+/**
+ * cxlflash_show_lun_mode() - presents the current LUN mode of the host
+ * @dev:	Generic device associated with the host.
+ * @attr:	Device attribute representing the lun mode.
+ * @buf:	Buffer of length PAGE_SIZE to report back the LUN mode in ASCII.
+ *
+ * Return: The size of the ASCII string returned in @buf.
+ */
+static ssize_t cxlflash_show_lun_mode(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	struct cxlflash_cfg *cfg = (struct cxlflash_cfg *)shost->hostdata;
+	struct afu *afu = cfg->afu;
+
+	return snprintf(buf, PAGE_SIZE, "%u\n", afu->internal_lun);
+}
+
+/**
+ * cxlflash_store_lun_mode() - sets the LUN mode of the host
+ * @dev:	Generic device associated with the host.
+ * @attr:	Device attribute representing the lun mode.
+ * @buf:	Buffer of length PAGE_SIZE containing the LUN mode in ASCII.
+ * @count:	Length of data resizing in @buf.
+ *
+ * The CXL Flash AFU supports a dummy LUN mode where the external
+ * links and storage are not required. Space on the FPGA is used
+ * to create 1 or 2 small LUNs which are presented to the system
+ * as if they were a normal storage device. This feature is useful
+ * during development and also provides manufacturing with a way
+ * to test the AFU without an actual device.
+ *
+ * 0 = external LUN[s] (default)
+ * 1 = internal LUN (1 x 64K, 512B blocks, id 0)
+ * 2 = internal LUN (1 x 64K, 4K blocks, id 0)
+ * 3 = internal LUN (2 x 32K, 512B blocks, ids 0,1)
+ * 4 = internal LUN (2 x 32K, 4K blocks, ids 0,1)
+ *
+ * Return: The size of the ASCII string returned in @buf.
+ */
+static ssize_t cxlflash_store_lun_mode(struct device *dev,
+				       struct device_attribute *attr,
+				       const char *buf, size_t count)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	struct cxlflash_cfg *cfg = (struct cxlflash_cfg *)shost->hostdata;
+	struct afu *afu = cfg->afu;
+	int rc;
+	u32 lun_mode;
+
+	rc = kstrtouint(buf, 10, &lun_mode);
+	if (!rc && (lun_mode < 5) && (lun_mode != afu->internal_lun)) {
+		afu->internal_lun = lun_mode;
+		cxlflash_afu_reset(cfg);
+		scsi_scan_host(cfg->host);
+	}
+
+	return count;
+}
+
+/**
+ * cxlflash_show_dev_mode() - presents the current mode of the device
+ * @dev:	Generic device associated with the device.
+ * @attr:	Device attribute representing the device mode.
+ * @buf:	Buffer of length PAGE_SIZE to report back the dev mode in ASCII.
+ *
+ * Return: The size of the ASCII string returned in @buf.
+ */
+static ssize_t cxlflash_show_dev_mode(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct scsi_device *sdev = to_scsi_device(dev);
+	void *lun_info = (void *)sdev->hostdata;
+	char *legacy = "legacy",
+	     *superpipe = "superpipe";
+
+	return snprintf(buf, PAGE_SIZE, "%s\n", lun_info ? superpipe : legacy);
+}
+
+/**
+ * cxlflash_wait_for_pci_err_recovery() - wait for error recovery during probe
+ * @cxlflash:	Internal structure associated with the host.
+ */
+static void cxlflash_wait_for_pci_err_recovery(struct cxlflash_cfg *cfg)
+{
+	struct pci_dev *pdev = cfg->dev;
+
+	if (pci_channel_offline(pdev))
+		wait_event_timeout(cfg->eeh_wait_q,
+				   !pci_channel_offline(pdev),
+				   CXLFLASH_PCI_ERROR_RECOVERY_TIMEOUT);
+}
+
+/*
+ * Host attributes
+ */
+static DEVICE_ATTR(port0, S_IRUGO, cxlflash_show_port_status, NULL);
+static DEVICE_ATTR(port1, S_IRUGO, cxlflash_show_port_status, NULL);
+static DEVICE_ATTR(lun_mode, S_IRUGO | S_IWUSR, cxlflash_show_lun_mode,
+		   cxlflash_store_lun_mode);
+
+static struct device_attribute *cxlflash_host_attrs[] = {
+	&dev_attr_port0,
+	&dev_attr_port1,
+	&dev_attr_lun_mode,
+	NULL
+};
+
+/*
+ * Device attributes
+ */
+static DEVICE_ATTR(mode, S_IRUGO, cxlflash_show_dev_mode, NULL);
+
+static struct device_attribute *cxlflash_dev_attrs[] = {
+	&dev_attr_mode,
+	NULL
+};
+
+/*
+ * Host template
+ */
+static struct scsi_host_template driver_template = {
+	.module = THIS_MODULE,
+	.name = CXLFLASH_ADAPTER_NAME,
+	.info = cxlflash_driver_info,
+	.proc_name = CXLFLASH_NAME,
+	.queuecommand = cxlflash_queuecommand,
+	.eh_device_reset_handler = cxlflash_eh_device_reset_handler,
+	.eh_host_reset_handler = cxlflash_eh_host_reset_handler,
+	.change_queue_depth = cxlflash_change_queue_depth,
+	.cmd_per_lun = 16,
+	.can_queue = CXLFLASH_MAX_CMDS,
+	.this_id = -1,
+	.sg_tablesize = SG_NONE,	/* No scatter gather support. */
+	.max_sectors = CXLFLASH_MAX_SECTORS,
+	.use_clustering = ENABLE_CLUSTERING,
+	.shost_attrs = cxlflash_host_attrs,
+	.sdev_attrs = cxlflash_dev_attrs,
+};
+
+/*
+ * Device dependent values
+ */
+static struct dev_dependent_vals dev_corsa_vals = { CXLFLASH_MAX_SECTORS };
+
+/*
+ * PCI device binding table
+ */
+static struct pci_device_id cxlflash_pci_table[] = {
+	{PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CORSA,
+	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, (kernel_ulong_t)&dev_corsa_vals},
+	{}
+};
+
+MODULE_DEVICE_TABLE(pci, cxlflash_pci_table);
+
+/**
+ * free_mem() - free memory associated with the AFU
+ * @cxlflash:	Internal structure associated with the host.
+ *
+ * As part of draining the AFU command pool, the timers of each
+ * command are ensured to be stopped.
+ */
+static void free_mem(struct cxlflash_cfg *cfg)
+{
+	int i;
+	char *buf = NULL;
+	struct afu *afu = cfg->afu;
+
+	if (cfg->afu) {
+		for (i = 0; i < CXLFLASH_NUM_CMDS; i++) {
+			buf = afu->cmd[i].buf;
+			if (!((u64)buf & (PAGE_SIZE - 1)))
+				free_page((unsigned long)buf);
+		}
+
+		free_pages((unsigned long)afu, get_order(sizeof(struct afu)));
+		cfg->afu = NULL;
+	}
+}
+
+/**
+ * stop_afu() - stops the AFU command timers and unmaps the MMIO space
+ * @cxlflash:	Internal structure associated with the host.
+ *
+ * Safe to call with AFU in a partially allocated/initialized state.
+ */
+static void stop_afu(struct cxlflash_cfg *cfg)
+{
+	int i;
+	struct afu *afu = cfg->afu;
+
+	if (!afu) {
+		pr_debug("%s: returning because afu is NULL\n", __func__);
+		return;
+	}
+
+	/* Need to stop timers before unmapping */
+	for (i = 0; i < CXLFLASH_NUM_CMDS; i++) {
+		if (afu->cmd[i].timer.function)
+			del_timer_sync(&afu->cmd[i].timer);
+	}
+
+	if (afu->afu_map) {
+		cxl_psa_unmap((void *)afu->afu_map);
+		afu->afu_map = NULL;
+	}
+}
+
+/**
+ * term_mc() - terminates the master context
+ * @cxlflash:	Internal structure associated with the host.
+ * @level:	Depth of allocation, where to begin waterfall tear down.
+ *
+ * Safe to call with AFU/MC in partially allocated/initialized state.
+ */
+static void term_mc(struct cxlflash_cfg *cfg, enum undo_level level)
+{
+	int rc = 0;
+	struct afu *afu = cfg->afu;
+
+	if (!afu || !cfg->mcctx) {
+		pr_err("%s: returning from term_mc with NULL afu or MC\n",
+		       __func__);
+		return;
+	}
+
+	switch (level) {
+	case UNDO_START:
+		rc = cxl_stop_context(cfg->mcctx);
+		BUG_ON(rc);
+	case UNMAP_THREE:
+		pr_debug("%s: before unmap 3\n", __func__);
+		cxl_unmap_afu_irq(cfg->mcctx, 3, afu);
+	case UNMAP_TWO:
+		pr_debug("%s: before unmap 2\n", __func__);
+		cxl_unmap_afu_irq(cfg->mcctx, 2, afu);
+	case UNMAP_ONE:
+		pr_debug("%s: before unmap 1\n", __func__);
+		cxl_unmap_afu_irq(cfg->mcctx, 1, afu);
+	case FREE_IRQ:
+		pr_debug("%s: before cxl_free_afu_irqs\n", __func__);
+		cxl_free_afu_irqs(cfg->mcctx);
+	case RELEASE_CONTEXT:
+		cfg->mcctx = NULL;
+	}
+}
+
+/**
+ * term_afu() - terminates the AFU
+ * @cxlflash:	Internal structure associated with the host.
+ *
+ * Safe to call with AFU/MC in partially allocated/initialized state.
+ */
+static void term_afu(struct cxlflash_cfg *cfg)
+{
+	term_mc(cfg, UNDO_START);
+
+	/* Need to stop timers before unmapping */
+	if (cfg->afu)
+		stop_afu(cfg);
+
+	pr_debug("%s: returning\n", __func__);
+}
+
+/**
+ * cxlflash_remove() - PCI entry point to tear down host
+ * @pdev:	PCI device associated with the host.
+ *
+ * Safe to use as a cleanup in partially allocated/initialized state.
+ */
+static void cxlflash_remove(struct pci_dev *pdev)
+{
+	struct cxlflash_cfg *cfg = pci_get_drvdata(pdev);
+
+	dev_dbg(&pdev->dev, "%s: enter cxlflash_remove!\n", __func__);
+
+	wait_event(cfg->tmf_wait_q, !cfg->tmf_active);
+
+	switch (cfg->init_state) {
+	case INIT_STATE_SCSI:
+		scsi_remove_host(cfg->host);
+		dev_dbg(&pdev->dev, "%s: after scsi_remove_host!\n", __func__);
+		scsi_host_put(cfg->host);
+		dev_dbg(&pdev->dev, "%s: after scsi_host_put!\n", __func__);
+		/* Fall through */
+	case INIT_STATE_PCI:
+		pci_release_regions(cfg->dev);
+		pci_disable_device(pdev);
+	case INIT_STATE_AFU:
+		term_afu(cfg);
+		dev_dbg(&pdev->dev, "%s: after struct term_afu!\n",
+			__func__);
+	case INIT_STATE_NONE:
+		flush_work(&cfg->work_q);
+		free_mem(cfg);
+		break;
+	}
+
+	pr_debug("%s: returning\n", __func__);
+}
+
+/**
+ * alloc_mem() - allocates the AFU and its command pool
+ * @cxlflash:	Internal structure associated with the host.
+ *
+ * A partially allocated state remains on failure.
+ *
+ * Return:
+ *	0 on success
+ *	-ENOMEM on failure to allocate memory
+ */
+static int alloc_mem(struct cxlflash_cfg *cfg)
+{
+	int rc = 0;
+	int i;
+	char *buf = NULL;
+
+	/* This allocation is about 12K, i.e. only 1 64k page
+	 * and upto 4 4k pages
+	 */
+	cfg->afu = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+					    get_order(sizeof(struct afu)));
+	if (unlikely(!cfg->afu)) {
+		pr_err("%s: cannot get %d free pages\n",
+		       __func__, get_order(sizeof(struct afu)));
+		rc = -ENOMEM;
+		goto out;
+	}
+	cfg->afu->parent = cfg;
+	cfg->afu->afu_map = NULL;
+
+	for (i = 0; i < CXLFLASH_NUM_CMDS; buf += CMD_BUFSIZE, i++) {
+		if (!((u64)buf & (PAGE_SIZE - 1))) {
+			buf = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+			if (unlikely(!buf)) {
+				pr_err("%s: Allocate command buffers fail!\n",
+				       __func__);
+				rc = -ENOMEM;
+				free_mem(cfg);
+				goto out;
+			}
+		}
+
+		cfg->afu->cmd[i].buf = buf;
+		atomic_set(&cfg->afu->cmd[i].free, 1);
+		cfg->afu->cmd[i].slot = i;
+		cfg->afu->cmd[i].special = 0;
+	}
+
+out:
+	return rc;
+}
+
+/**
+ * init_pci() - initializes the host as a PCI device
+ * @cxlflash:	Internal structure associated with the host.
+ *
+ * Return:
+ *	0 on success
+ *	-EIO on unable to communicate with device
+ *	A return code from the PCI sub-routines
+ */
+static int init_pci(struct cxlflash_cfg *cfg)
+{
+	struct pci_dev *pdev = cfg->dev;
+	int rc = 0;
+
+	cfg->cxlflash_regs_pci = pci_resource_start(pdev, 0);
+	rc = pci_request_regions(pdev, CXLFLASH_NAME);
+	if (rc < 0) {
+		dev_err(&pdev->dev,
+			"%s: Couldn't register memory range of registers\n",
+			__func__);
+		goto out;
+	}
+
+	rc = pci_enable_device(pdev);
+	if (rc || pci_channel_offline(pdev)) {
+		if (pci_channel_offline(pdev)) {
+			cxlflash_wait_for_pci_err_recovery(cfg);
+			rc = pci_enable_device(pdev);
+		}
+
+		if (rc) {
+			dev_err(&pdev->dev, "%s: Cannot enable adapter\n",
+				__func__);
+			cxlflash_wait_for_pci_err_recovery(cfg);
+			goto out_release_regions;
+		}
+	}
+
+	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (rc < 0) {
+		dev_dbg(&pdev->dev, "%s: Failed to set 64 bit PCI DMA mask\n",
+			__func__);
+		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	}
+
+	if (rc < 0) {
+		dev_err(&pdev->dev, "%s: Failed to set PCI DMA mask\n",
+			__func__);
+		goto out_disable;
+	}
+
+	pci_set_master(pdev);
+
+	if (pci_channel_offline(pdev)) {
+		cxlflash_wait_for_pci_err_recovery(cfg);
+		if (pci_channel_offline(pdev)) {
+			rc = -EIO;
+			goto out_msi_disable;
+		}
+	}
+
+	rc = pci_save_state(pdev);
+
+	if (rc != PCIBIOS_SUCCESSFUL) {
+		dev_err(&pdev->dev, "%s: Failed to save PCI config space\n",
+			__func__);
+		rc = -EIO;
+		goto cleanup_nolog;
+	}
+
+out:
+	pr_debug("%s: returning rc=%d\n", __func__, rc);
+	return rc;
+
+cleanup_nolog:
+out_msi_disable:
+	cxlflash_wait_for_pci_err_recovery(cfg);
+out_disable:
+	pci_disable_device(pdev);
+out_release_regions:
+	pci_release_regions(pdev);
+	goto out;
+
+}
+
+/**
+ * init_scsi() - adds the host to the SCSI stack and kicks off host scan
+ * @cxlflash:	Internal structure associated with the host.
+ *
+ * Return:
+ *	0 on success
+ *	A return code from adding the host
+ */
+static int init_scsi(struct cxlflash_cfg *cfg)
+{
+	struct pci_dev *pdev = cfg->dev;
+	int rc = 0;
+
+	dev_dbg(&pdev->dev, "%s: before scsi_add_host\n", __func__);
+	rc = scsi_add_host(cfg->host, &pdev->dev);
+	if (rc) {
+		dev_err(&pdev->dev, "%s: scsi_add_host failed (rc=%d)\n",
+			__func__, rc);
+		goto out;
+	}
+
+	dev_dbg(&pdev->dev, "%s: before scsi_scan_host\n", __func__);
+	scsi_scan_host(cfg->host);
+
+out:
+	pr_debug("%s: returning rc=%d\n", __func__, rc);
+	return rc;
+}
+
+/**
+ * set_port_online() - transitions the specified host FC port to online state
+ * @fc_regs:	Top of MMIO region defined for specified port.
+ *
+ * The provided MMIO region must be mapped prior to call. Online state means
+ * that the FC link layer has synced, completed the handshaking process, and
+ * is ready for login to start.
+ */
+static void set_port_online(volatile u64 *fc_regs)
+{
+	u64 cmdcfg;
+
+	cmdcfg = readq_be(&fc_regs[FC_MTIP_CMDCONFIG / 8]);
+	cmdcfg &= (~FC_MTIP_CMDCONFIG_OFFLINE);	/* clear OFF_LINE */
+	cmdcfg |= (FC_MTIP_CMDCONFIG_ONLINE);	/* set ON_LINE */
+	writeq_be(cmdcfg, &fc_regs[FC_MTIP_CMDCONFIG / 8]);
+}
+
+/**
+ * set_port_offline() - transitions the specified host FC port to offline state
+ * @fc_regs:	Top of MMIO region defined for specified port.
+ *
+ * The provided MMIO region must be mapped prior to call.
+ */
+static void set_port_offline(volatile u64 *fc_regs)
+{
+	u64 cmdcfg;
+
+	cmdcfg = readq_be(&fc_regs[FC_MTIP_CMDCONFIG / 8]);
+	cmdcfg &= (~FC_MTIP_CMDCONFIG_ONLINE);	/* clear ON_LINE */
+	cmdcfg |= (FC_MTIP_CMDCONFIG_OFFLINE);	/* set OFF_LINE */
+	writeq_be(cmdcfg, &fc_regs[FC_MTIP_CMDCONFIG / 8]);
+}
+
+/**
+ * wait_port_online() - waits for the specified host FC port come online
+ * @fc_regs:	Top of MMIO region defined for specified port.
+ * @delay_us:	Number of microseconds to delay between reading port status.
+ * @nretry:	Number of cycles to retry reading port status.
+ *
+ * The provided MMIO region must be mapped prior to call. This will timeout
+ * when the cable is not plugged in.
+ *
+ * Return:
+ *	TRUE (1) when the specified port is online
+ *	FALSE (0) when the specified port fails to come online after timeout
+ *	-EINVAL when @delay_us is less than 1000
+ */
+static int wait_port_online(volatile u64 *fc_regs,
+			    useconds_t delay_us, unsigned int nretry)
+{
+	u64 status;
+
+	if (delay_us < 1000) {
+		pr_err("%s: invalid delay specified %d\n", __func__, delay_us);
+		return -EINVAL;
+	}
+
+	do {
+		msleep(delay_us / 1000);
+		status = readq_be(&fc_regs[FC_MTIP_STATUS / 8]);
+	} while ((status & FC_MTIP_STATUS_MASK) != FC_MTIP_STATUS_ONLINE &&
+		 nretry--);
+
+	return ((status & FC_MTIP_STATUS_MASK) == FC_MTIP_STATUS_ONLINE);
+}
+
+/**
+ * wait_port_offline() - waits for the specified host FC port go offline
+ * @fc_regs:	Top of MMIO region defined for specified port.
+ * @delay_us:	Number of microseconds to delay between reading port status.
+ * @nretry:	Number of cycles to retry reading port status.
+ *
+ * The provided MMIO region must be mapped prior to call.
+ *
+ * Return:
+ *	TRUE (1) when the specified port is offline
+ *	FALSE (0) when the specified port fails to go offline after timeout
+ *	-EINVAL when @delay_us is less than 1000
+ */
+static int wait_port_offline(volatile u64 *fc_regs,
+			     useconds_t delay_us, unsigned int nretry)
+{
+	u64 status;
+
+	if (delay_us < 1000) {
+		pr_err("%s: invalid delay specified %d\n", __func__, delay_us);
+		return -EINVAL;
+	}
+
+	do {
+		msleep(delay_us / 1000);
+		status = readq_be(&fc_regs[FC_MTIP_STATUS / 8]);
+	} while ((status & FC_MTIP_STATUS_MASK) != FC_MTIP_STATUS_OFFLINE &&
+		 nretry--);
+
+	return ((status & FC_MTIP_STATUS_MASK) == FC_MTIP_STATUS_OFFLINE);
+}
+
+/**
+ * afu_set_wwpn() - configures the WWPN for the specified host FC port
+ * @afu:	AFU associated with the host that owns the specified FC port.
+ * @port:	Port number being configured.
+ * @fc_regs:	Top of MMIO region defined for specified port.
+ * @wwpn:	The world-wide-port-number previously discovered for port.
+ *
+ * The provided MMIO region must be mapped prior to call. As part of the
+ * sequence to configure the WWPN, the port is toggled offline and then back
+ * online. This toggling action can cause this routine to delay up to a few
+ * seconds. When configured to use the internal LUN feature of the AFU, a
+ * failure to come online is overridden.
+ *
+ * Return:
+ *	0 when the WWPN is successfully written and the port comes back online
+ *	-1 when the port fails to go offline or come back up online
+ */
+static int afu_set_wwpn(struct afu *afu, int port,
+			volatile u64 *fc_regs, u64 wwpn)
+{
+	int ret = 0;
+
+	set_port_offline(fc_regs);
+
+	if (!wait_port_offline(fc_regs, FC_PORT_STATUS_RETRY_INTERVAL_US,
+			       FC_PORT_STATUS_RETRY_CNT)) {
+		pr_debug("%s: wait on port %d to go offline timed out\n",
+			 __func__, port);
+		ret = -1; /* but continue on to leave the port back online */
+	}
+
+	if (ret == 0)
+		writeq_be(wwpn, &fc_regs[FC_PNAME / 8]);
+
+	set_port_online(fc_regs);
+
+	if (!wait_port_online(fc_regs, FC_PORT_STATUS_RETRY_INTERVAL_US,
+			      FC_PORT_STATUS_RETRY_CNT)) {
+		pr_debug("%s: wait on port %d to go online timed out\n",
+			 __func__, port);
+		ret = -1;
+
+		/*
+		 * Override for internal lun!!!
+		 */
+		if (afu->internal_lun) {
+			pr_debug("%s: Overriding port %d online timeout!!!\n",
+				 __func__, port);
+			ret = 0;
+		}
+	}
+
+	pr_debug("%s: returning rc=%d\n", __func__, ret);
+
+	return ret;
+}
+
+/**
+ * afu_link_reset() - resets the specified host FC port
+ * @afu:	AFU associated with the host that owns the specified FC port.
+ * @port:	Port number being configured.
+ * @fc_regs:	Top of MMIO region defined for specified port.
+ *
+ * The provided MMIO region must be mapped prior to call. The sequence to
+ * reset the port involves toggling it offline and then back online. This
+ * action can cause this routine to delay up to a few seconds. An effort
+ * is made to maintain link with the device by switching to host to use
+ * the alternate port exclusively while the reset takes place.
+ * failure to come online is overridden.
+ */
+static void afu_link_reset(struct afu *afu, int port, volatile u64 *fc_regs)
+{
+	u64 port_sel;
+
+	/* first switch the AFU to the other links, if any */
+	port_sel = readq_be(&afu->afu_map->global.regs.afu_port_sel);
+	port_sel &= ~(1 << port);
+	writeq_be(port_sel, &afu->afu_map->global.regs.afu_port_sel);
+	cxlflash_afu_sync(afu, 0, 0, AFU_GSYNC);
+
+	set_port_offline(fc_regs);
+	if (!wait_port_offline(fc_regs, FC_PORT_STATUS_RETRY_INTERVAL_US,
+			       FC_PORT_STATUS_RETRY_CNT))
+		pr_err("%s: wait on port %d to go offline timed out\n",
+		       __func__, port);
+
+	set_port_online(fc_regs);
+	if (!wait_port_online(fc_regs, FC_PORT_STATUS_RETRY_INTERVAL_US,
+			      FC_PORT_STATUS_RETRY_CNT))
+		pr_err("%s: wait on port %d to go online timed out\n",
+		       __func__, port);
+
+	/* switch back to include this port */
+	port_sel |= (1 << port);
+	writeq_be(port_sel, &afu->afu_map->global.regs.afu_port_sel);
+	cxlflash_afu_sync(afu, 0, 0, AFU_GSYNC);
+
+	pr_debug("%s: returning port_sel=%lld\n", __func__, port_sel);
+}
+
+/*
+ * Asynchronous interrupt information table
+ */
+static const struct asyc_intr_info ainfo[] = {
+	{SISL_ASTATUS_FC0_OTHER, "fc 0: other error", 0,
+		CLR_FC_ERROR | LINK_RESET},
+	{SISL_ASTATUS_FC0_LOGO, "fc 0: target initiated LOGO", 0, 0},
+	{SISL_ASTATUS_FC0_CRC_T, "fc 0: CRC threshold exceeded", 0, LINK_RESET},
+	{SISL_ASTATUS_FC0_LOGI_R, "fc 0: login timed out, retrying", 0, 0},
+	{SISL_ASTATUS_FC0_LOGI_F, "fc 0: login failed", 0, CLR_FC_ERROR},
+	{SISL_ASTATUS_FC0_LOGI_S, "fc 0: login succeeded", 0, 0},
+	{SISL_ASTATUS_FC0_LINK_DN, "fc 0: link down", 0, 0},
+	{SISL_ASTATUS_FC0_LINK_UP, "fc 0: link up", 0, 0},
+
+	{SISL_ASTATUS_FC1_OTHER, "fc 1: other error", 1,
+	 CLR_FC_ERROR | LINK_RESET},
+	{SISL_ASTATUS_FC1_LOGO, "fc 1: target initiated LOGO", 1, 0},
+	{SISL_ASTATUS_FC1_CRC_T, "fc 1: CRC threshold exceeded", 1, LINK_RESET},
+	{SISL_ASTATUS_FC1_LOGI_R, "fc 1: login timed out, retrying", 1, 0},
+	{SISL_ASTATUS_FC1_LOGI_F, "fc 1: login failed", 1, CLR_FC_ERROR},
+	{SISL_ASTATUS_FC1_LOGI_S, "fc 1: login succeeded", 1, 0},
+	{SISL_ASTATUS_FC1_LINK_DN, "fc 1: link down", 1, 0},
+	{SISL_ASTATUS_FC1_LINK_UP, "fc 1: link up", 1, 0},
+	{0x0, "", 0, 0}		/* terminator */
+};
+
+/**
+ * find_ainfo() - locates and returns asynchronous interrupt information
+ * @status:	Status code set by AFU on error.
+ *
+ * Return: The located information or NULL when the status code is invalid.
+ */
+static const struct asyc_intr_info *find_ainfo(u64 status)
+{
+	const struct asyc_intr_info *info;
+
+	for (info = &ainfo[0]; info->status; info++)
+		if (info->status == status)
+			return info;
+
+	return NULL;
+}
+
+/**
+ * afu_err_intr_init() - clears and initializes the AFU for error interrupts
+ * @afu:	AFU associated with the host.
+ */
+static void afu_err_intr_init(struct afu *afu)
+{
+	int i;
+	volatile u64 reg;
+
+	/* global async interrupts: AFU clears afu_ctrl on context exit
+	 * if async interrupts were sent to that context. This prevents
+	 * the AFU form sending further async interrupts when
+	 * there is
+	 * nobody to receive them.
+	 */
+
+	/* mask all */
+	writeq_be(-1ULL, &afu->afu_map->global.regs.aintr_mask);
+	/* set LISN# to send and point to master context */
+	reg = ((u64) (((afu->ctx_hndl << 8) | SISL_MSI_ASYNC_ERROR)) << 40);
+
+	if (afu->internal_lun)
+		reg |= 1;	/* Bit 63 indicates local lun */
+	writeq_be(reg, &afu->afu_map->global.regs.afu_ctrl);
+	/* clear all */
+	writeq_be(-1ULL, &afu->afu_map->global.regs.aintr_clear);
+	/* unmask bits that are of interest */
+	/* note: afu can send an interrupt after this step */
+	writeq_be(SISL_ASTATUS_MASK, &afu->afu_map->global.regs.aintr_mask);
+	/* clear again in case a bit came on after previous clear but before */
+	/* unmask */
+	writeq_be(-1ULL, &afu->afu_map->global.regs.aintr_clear);
+
+	/* Clear/Set internal lun bits */
+	reg = readq_be(&afu->afu_map->global.fc_regs[0][FC_CONFIG2 / 8]);
+	pr_debug("%s: ilun p0 = %016llX\n", __func__, reg);
+	reg &= SISL_FC_INTERNAL_MASK;
+	if (afu->internal_lun)
+		reg |= ((u64)(afu->internal_lun - 1) << SISL_FC_INTERNAL_SHIFT);
+	pr_debug("%s: ilun p0 = %016llX\n", __func__, reg);
+	writeq_be(reg, &afu->afu_map->global.fc_regs[0][FC_CONFIG2 / 8]);
+
+	/* now clear FC errors */
+	for (i = 0; i < NUM_FC_PORTS; i++) {
+		writeq_be(0xFFFFFFFFU,
+			  &afu->afu_map->global.fc_regs[i][FC_ERROR / 8]);
+		writeq_be(0, &afu->afu_map->global.fc_regs[i][FC_ERRCAP / 8]);
+	}
+
+	/* sync interrupts for master's IOARRIN write */
+	/* note that unlike asyncs, there can be no pending sync interrupts */
+	/* at this time (this is a fresh context and master has not written */
+	/* IOARRIN yet), so there is nothing to clear. */
+
+	/* set LISN#, it is always sent to the context that wrote IOARRIN */
+	writeq_be(SISL_MSI_SYNC_ERROR, &afu->host_map->ctx_ctrl);
+	writeq_be(SISL_ISTATUS_MASK, &afu->host_map->intr_mask);
+}
+
+/**
+ * cxlflash_sync_err_irq() - interrupt handler for synchronous errors
+ * @irq:	Interrupt number.
+ * @data:	Private data provided at interrupt registration, the AFU.
+ *
+ * Return: Always return IRQ_HANDLED.
+ */
+static irqreturn_t cxlflash_sync_err_irq(int irq, void *data)
+{
+	struct afu *afu = (struct afu *)data;
+	u64 reg;
+	u64 reg_unmasked;
+
+	reg = readq_be(&afu->host_map->intr_status);
+	reg_unmasked = (reg & SISL_ISTATUS_UNMASK);
+
+	if (reg_unmasked == 0UL) {
+		pr_err("%s: %llX: spurious interrupt, intr_status %016llX\n",
+		       __func__, (u64)afu, reg);
+		goto cxlflash_sync_err_irq_exit;
+	}
+
+	pr_err("%s: %llX: unexpected interrupt, intr_status %016llX\n",
+	       __func__, (u64)afu, reg);
+
+	writeq_be(reg_unmasked, &afu->host_map->intr_clear);
+
+cxlflash_sync_err_irq_exit:
+	pr_debug("%s: returning rc=%d\n", __func__, IRQ_HANDLED);
+	return IRQ_HANDLED;
+}
+
+/**
+ * cxlflash_rrq_irq() - interrupt handler for read-response queue (normal path)
+ * @irq:	Interrupt number.
+ * @data:	Private data provided at interrupt registration, the AFU.
+ *
+ * Return: Always return IRQ_HANDLED.
+ */
+static irqreturn_t cxlflash_rrq_irq(int irq, void *data)
+{
+	struct afu *afu = (struct afu *)data;
+	struct afu_cmd *cmd;
+	bool toggle = afu->toggle;
+	u64 entry;
+	u64 *hrrq_start = afu->hrrq_start,
+	    *hrrq_end = afu->hrrq_end;
+	volatile u64 *hrrq_curr = afu->hrrq_curr;
+
+	/* Process however many RRQ entries that are ready */
+	while (true) {
+		entry = *hrrq_curr;
+
+		if ((entry & SISL_RESP_HANDLE_T_BIT) != toggle)
+			break;
+
+		cmd = (struct afu_cmd *)(entry & ~SISL_RESP_HANDLE_T_BIT);
+		cmd_complete(cmd);
+
+		/* Advance to next entry or wrap and flip the toggle bit */
+		if (hrrq_curr < hrrq_end)
+			hrrq_curr++;
+		else {
+			hrrq_curr = hrrq_start;
+			toggle ^= SISL_RESP_HANDLE_T_BIT;
+		}
+	}
+
+	afu->hrrq_curr = hrrq_curr;
+	afu->toggle = toggle;
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * cxlflash_async_err_irq() - interrupt handler for asynchronous errors
+ * @irq:	Interrupt number.
+ * @data:	Private data provided at interrupt registration, the AFU.
+ *
+ * Return: Always return IRQ_HANDLED.
+ */
+static irqreturn_t cxlflash_async_err_irq(int irq, void *data)
+{
+	struct afu *afu = (struct afu *)data;
+	struct cxlflash_cfg *cfg;
+	u64 reg_unmasked;
+	const struct asyc_intr_info *info;
+	volatile struct sisl_global_map *global = &afu->afu_map->global;
+	u64 reg;
+	int i;
+
+	cfg = afu->parent;
+
+	reg = readq_be(&global->regs.aintr_status);
+	reg_unmasked = (reg & SISL_ASTATUS_UNMASK);
+
+	if (reg_unmasked == 0) {
+		pr_err("%s: spurious interrupt, aintr_status 0x%016llX\n",
+		       __func__, reg);
+		goto out;
+	}
+
+	/* it is OK to clear AFU status before FC_ERROR */
+	writeq_be(reg_unmasked, &global->regs.aintr_clear);
+
+	/* check each bit that is on */
+	for (i = 0; reg_unmasked; i++, reg_unmasked = (reg_unmasked >> 1)) {
+		info = find_ainfo(1ULL << i);
+		if ((reg_unmasked & 0x1) || !info)
+			continue;
+
+		pr_err("%s: %s, fc_status 0x%08llX\n", __func__, info->desc,
+		       readq_be(&global->fc_regs[info->port][FC_STATUS / 8]));
+
+		/*
+		 * do link reset first, some OTHER errors will set FC_ERROR
+		 * again if cleared before or w/o a reset
+		 */
+		if (info->action & LINK_RESET) {
+			pr_err("%s: fc %d: resetting link\n",
+			       __func__, info->port);
+			cfg->lr_state = LINK_RESET_REQUIRED;
+			cfg->lr_port = info->port;
+			schedule_work(&cfg->work_q);
+		}
+
+		if (info->action & CLR_FC_ERROR) {
+			reg = readq_be(&global->fc_regs[info->port]
+				       [FC_ERROR / 8]);
+
+			/*
+			 * since all errors are unmasked, FC_ERROR and FC_ERRCAP
+			 * should be the same and tracing one is sufficient.
+			 */
+
+			pr_err("%s: fc %d: clearing fc_error 0x%08llX\n",
+			       __func__, info->port, reg);
+
+			writeq_be(reg,
+				  &global->fc_regs[info->port][FC_ERROR /
+								   8]);
+			writeq_be(0,
+				  &global->fc_regs[info->port][FC_ERRCAP /
+								   8]);
+		}
+	}
+
+out:
+	pr_debug("%s: returning rc=%d, afu=%p\n", __func__, IRQ_HANDLED, afu);
+	return IRQ_HANDLED;
+}
+
+/**
+ * start_context() - starts the master context
+ * @cxlflash:	Internal structure associated with the host.
+ *
+ * Return: A success or failure value from CXL services.
+ */
+static int start_context(struct cxlflash_cfg *cfg)
+{
+	int rc = 0;
+
+	rc = cxl_start_context(cfg->mcctx,
+			       cfg->afu->work.work_element_descriptor,
+			       NULL);
+
+	pr_debug("%s: returning rc=%d\n", __func__, rc);
+	return rc;
+}
+
+/**
+ * read_vpd() - obtains the WWPNs from VPD
+ * @cxlflash:	Internal structure associated with the host.
+ * @wwpn:	Array of size NUM_FC_PORTS to pass back WWPNs
+ *
+ * Return:
+ *	0 on success
+ *	-ENODEV when VPD or WWPN keywords not found
+ */
+static int read_vpd(struct cxlflash_cfg *cfg, u64 wwpn[])
+{
+	struct pci_dev *dev = cfg->parent_dev;
+	int rc = 0;
+	int ro_start, ro_size, i, j, k;
+	ssize_t vpd_size;
+	char vpd_data[CXLFLASH_VPD_LEN];
+	char tmp_buf[WWPN_BUF_LEN] = { 0 };
+	char *wwpn_vpd_tags[NUM_FC_PORTS] = { "V5", "V6" };
+
+	/* Get the VPD data from the device */
+	vpd_size = pci_read_vpd(dev, 0, sizeof(vpd_data), vpd_data);
+	if (unlikely(vpd_size <= 0)) {
+		pr_err("%s: Unable to read VPD (size = %ld)\n",
+		       __func__, vpd_size);
+		rc = -ENODEV;
+		goto out;
+	}
+
+	/* Get the read only section offset */
+	ro_start = pci_vpd_find_tag(vpd_data, 0, vpd_size,
+				    PCI_VPD_LRDT_RO_DATA);
+	if (unlikely(ro_start < 0)) {
+		pr_err("%s: VPD Read-only data not found\n", __func__);
+		rc = -ENODEV;
+		goto out;
+	}
+
+	/* Get the read only section size, cap when extends beyond read VPD */
+	ro_size = pci_vpd_lrdt_size(&vpd_data[ro_start]);
+	j = ro_size;
+	i = ro_start + PCI_VPD_LRDT_TAG_SIZE;
+	if (unlikely((i + j) > vpd_size)) {
+		pr_debug("%s: Might need to read more VPD (%d > %ld)\n",
+			 __func__, (i + j), vpd_size);
+		ro_size = vpd_size - i;
+	}
+
+	/*
+	 * Find the offset of the WWPN tag within the read only
+	 * VPD data and validate the found field (partials are
+	 * no good to us). Convert the ASCII data to an integer
+	 * value. Note that we must copy to a temporary buffer
+	 * because the conversion service requires that the ASCII
+	 * string be terminated.
+	 */
+	for (k = 0; k < NUM_FC_PORTS; k++) {
+		j = ro_size;
+		i = ro_start + PCI_VPD_LRDT_TAG_SIZE;
+
+		i = pci_vpd_find_info_keyword(vpd_data, i, j, wwpn_vpd_tags[k]);
+		if (unlikely(i < 0)) {
+			pr_err("%s: Port %d WWPN not found in VPD\n",
+			       __func__, k);
+			rc = -ENODEV;
+			goto out;
+		}
+
+		j = pci_vpd_info_field_size(&vpd_data[i]);
+		i += PCI_VPD_INFO_FLD_HDR_SIZE;
+		if (unlikely((i + j > vpd_size) || (j != WWPN_LEN))) {
+			pr_err("%s: Port %d WWPN incomplete or VPD corrupt\n",
+			       __func__, k);
+			rc = -ENODEV;
+			goto out;
+		}
+
+		memcpy(tmp_buf, &vpd_data[i], WWPN_LEN);
+		rc = kstrtoul(tmp_buf, WWPN_LEN, (unsigned long *)&wwpn[k]);
+		if (unlikely(rc)) {
+			pr_err("%s: Fail to convert port %d WWPN to integer\n",
+			       __func__, k);
+			rc = -ENODEV;
+			goto out;
+		}
+	}
+
+out:
+	pr_debug("%s: returning rc=%d\n", __func__, rc);
+	return rc;
+}
+
+/**
+ * cxlflash_context_reset() - timeout handler for AFU commands
+ * @cmd:	AFU command that timed out.
+ *
+ * Sends a reset to the AFU.
+ */
+void cxlflash_context_reset(struct afu_cmd *cmd)
+{
+	int nretry = 0;
+	u64 rrin = 0x1;
+	struct afu *afu = cmd->parent;
+
+	pr_debug("%s: cmd=%p\n", __func__, cmd);
+
+	/* First process completion of the command that timed out */
+	cmd_complete(cmd);
+
+	if (afu->room == 0) {
+		do {
+			/*
+			 * We really want to send this reset at all costs, so
+			 * spread out wait time on successive retries.
+			 */
+			udelay(nretry);
+			afu->room = readq_be(&afu->host_map->cmd_room);
+		} while ((afu->room == 0) && (nretry++ < MC_ROOM_RETRY_CNT));
+	}
+
+	if (afu->room) {
+		writeq_be((u64) rrin, &afu->host_map->ioarrin);
+		do {
+			rrin = readq_be(&afu->host_map->ioarrin);
+			/* Double delay each time */
+			udelay(2 ^ nretry);
+		} while ((rrin == 0x1) && (nretry++ < MC_ROOM_RETRY_CNT));
+	} else
+		pr_err("%s: no cmd_room to send reset\n", __func__);
+}
+
+/**
+ * init_pcr() - initialize the provisioning and control registers
+ * @cxlflash:	Internal structure associated with the host.
+ *
+ * Also sets up fast access to the mapped registers and initializes AFU
+ * command fields that never change.
+ */
+void init_pcr(struct cxlflash_cfg *cfg)
+{
+	struct afu *afu = cfg->afu;
+	volatile struct sisl_ctrl_map *ctrl_map;
+	int i;
+
+	for (i = 0; i < MAX_CONTEXT; i++) {
+		ctrl_map = &afu->afu_map->ctrls[i].ctrl;
+		/* disrupt any clients that could be running */
+		/* e. g. clients that survived a master restart */
+		writeq_be(0, &ctrl_map->rht_start);
+		writeq_be(0, &ctrl_map->rht_cnt_id);
+		writeq_be(0, &ctrl_map->ctx_cap);
+	}
+
+	/* copy frequently used fields into afu */
+	afu->ctx_hndl = (u16) cxl_process_element(cfg->mcctx);
+	/* ctx_hndl is 16 bits in CAIA */
+	afu->host_map = &afu->afu_map->hosts[afu->ctx_hndl].host;
+	afu->ctrl_map = &afu->afu_map->ctrls[afu->ctx_hndl].ctrl;
+
+	/* Program the Endian Control for the master context */
+	writeq_be((u64) SISL_ENDIAN_CTRL, &afu->host_map->endian_ctrl);
+
+	/* initialize cmd fields that never change */
+	for (i = 0; i < CXLFLASH_NUM_CMDS; i++) {
+		afu->cmd[i].rcb.ctx_id = afu->ctx_hndl;
+		afu->cmd[i].rcb.msi = SISL_MSI_RRQ_UPDATED;
+		afu->cmd[i].rcb.rrq = 0x0;
+	}
+
+}
+
+/**
+ * init_global() - initialize AFU global registers
+ * @cxlflash:	Internal structure associated with the host.
+ */
+int init_global(struct cxlflash_cfg *cfg)
+{
+	struct afu *afu = cfg->afu;
+	u64 wwpn[NUM_FC_PORTS];	/* wwpn of AFU ports */
+	int i = 0, num_ports = 0;
+	int rc = 0;
+	u64 reg;
+
+	rc = read_vpd(cfg, &wwpn[0]);
+	if (rc) {
+		pr_err("%s: could not read vpd rc=%d\n", __func__, rc);
+		goto out;
+	}
+
+	pr_debug("%s: wwpn0=0x%llX wwpn1=0x%llX\n", __func__, wwpn[0], wwpn[1]);
+
+	/* set up RRQ in AFU for master issued cmds */
+	writeq_be((u64) afu->hrrq_start, &afu->host_map->rrq_start);
+	writeq_be((u64) afu->hrrq_end, &afu->host_map->rrq_end);
+
+	/* AFU configuration */
+	reg = readq_be(&afu->afu_map->global.regs.afu_config);
+	reg |= SISL_AFUCONF_AR_ALL|SISL_AFUCONF_ENDIAN;
+	/* enable all auto retry options and control endianness */
+	/* leave others at default: */
+	/* CTX_CAP write protected, mbox_r does not clear on read and */
+	/* checker on if dual afu */
+	writeq_be(reg, &afu->afu_map->global.regs.afu_config);
+
+	/* global port select: select either port */
+	if (afu->internal_lun) {
+		/* only use port 0 */
+		writeq_be(PORT0, &afu->afu_map->global.regs.afu_port_sel);
+		num_ports = NUM_FC_PORTS - 1;
+	} else {
+		writeq_be(BOTH_PORTS, &afu->afu_map->global.regs.afu_port_sel);
+		num_ports = NUM_FC_PORTS;
+	}
+
+	for (i = 0; i < num_ports; i++) {
+		/* unmask all errors (but they are still masked at AFU) */
+		writeq_be(0, &afu->afu_map->global.fc_regs[i][FC_ERRMSK / 8]);
+		/* clear CRC error cnt & set a threshold */
+		(void)readq_be(&afu->afu_map->global.
+			       fc_regs[i][FC_CNT_CRCERR / 8]);
+		writeq_be(MC_CRC_THRESH, &afu->afu_map->global.fc_regs[i]
+			  [FC_CRC_THRESH / 8]);
+
+		/* set WWPNs. If already programmed, wwpn[i] is 0 */
+		if (wwpn[i] != 0 &&
+		    afu_set_wwpn(afu, i,
+				 &afu->afu_map->global.fc_regs[i][0],
+				 wwpn[i])) {
+			pr_debug("%s: failed to set WWPN on port %d\n",
+				 __func__, i);
+			rc = -EIO;
+			goto out;
+		}
+		/* Programming WWPN back to back causes additional
+		 * offline/online transitions and a PLOGI
+		 */
+		msleep(100);
+
+	}
+
+	/* set up master's own CTX_CAP to allow real mode, host translation */
+	/* tbls, afu cmds and read/write GSCSI cmds. */
+	/* First, unlock ctx_cap write by reading mbox */
+	(void)readq_be(&afu->ctrl_map->mbox_r);	/* unlock ctx_cap */
+	writeq_be((SISL_CTX_CAP_REAL_MODE | SISL_CTX_CAP_HOST_XLATE |
+		   SISL_CTX_CAP_READ_CMD | SISL_CTX_CAP_WRITE_CMD |
+		   SISL_CTX_CAP_AFU_CMD | SISL_CTX_CAP_GSCSI_CMD),
+		  &afu->ctrl_map->ctx_cap);
+	/* init heartbeat */
+	afu->hb = readq_be(&afu->afu_map->global.regs.afu_hb);
+
+out:
+	return rc;
+}
+
+/**
+ * start_afu() - initializes and starts the AFU
+ * @cxlflash:	Internal structure associated with the host.
+ */
+static int start_afu(struct cxlflash_cfg *cfg)
+{
+	struct afu *afu = cfg->afu;
+
+	int i = 0;
+	int rc = 0;
+
+	for (i = 0; i < CXLFLASH_NUM_CMDS; i++) {
+		struct timer_list *timer = &afu->cmd[i].timer;
+
+		init_timer(timer);
+		timer->data = (unsigned long)&afu->cmd[i];
+		timer->function = (void (*)(unsigned long))
+		    cxlflash_context_reset;
+
+		spin_lock_init(&afu->cmd[i].slock);
+		afu->cmd[i].parent = afu;
+	}
+	init_pcr(cfg);
+
+	/* initialize RRQ pointers */
+	afu->hrrq_start = &afu->rrq_entry[0];
+	afu->hrrq_end = &afu->rrq_entry[NUM_RRQ_ENTRY - 1];
+	afu->hrrq_curr = afu->hrrq_start;
+	afu->toggle = 1;
+
+	rc = init_global(cfg);
+
+	pr_debug("%s: returning rc=%d\n", __func__, rc);
+	return rc;
+}
+
+/**
+ * init_mc() - create and register as the master context
+ * @cxlflash:	Internal structure associated with the host.
+ *
+ * Return:
+ *	0 on success
+ *	-ENOMEM when unable to obtain a context from CXL services
+ *	A failure value from CXL services.
+ */
+static int init_mc(struct cxlflash_cfg *cfg)
+{
+	struct cxl_context *ctx;
+	struct device *dev = &cfg->dev->dev;
+	struct afu *afu = cfg->afu;
+	int rc = 0;
+	enum undo_level level;
+
+	ctx = cxl_get_context(cfg->dev);
+	if (!ctx)
+		return -ENOMEM;
+	cfg->mcctx = ctx;
+
+	/* Set it up as a master with the CXL */
+	cxl_set_master(ctx);
+
+	/* During initialization reset the AFU to start from a clean slate */
+	rc = cxl_afu_reset(cfg->mcctx);
+	if (rc) {
+		dev_err(dev, "%s: initial AFU reset failed rc=%d\n",
+			__func__, rc);
+		level = RELEASE_CONTEXT;
+		goto out;
+	}
+
+	rc = cxl_allocate_afu_irqs(ctx, 3);
+	if (rc) {
+		dev_err(dev, "%s: call to allocate_afu_irqs failed rc=%d!\n",
+			__func__, rc);
+		level = RELEASE_CONTEXT;
+		goto out;
+	}
+
+	rc = cxl_map_afu_irq(ctx, 1, cxlflash_sync_err_irq, afu,
+			     "SISL_MSI_SYNC_ERROR");
+	if (!rc) {
+		dev_err(dev, "%s: IRQ 1 (SISL_MSI_SYNC_ERROR) map failed!\n",
+			__func__);
+		level = FREE_IRQ;
+		goto out;
+	}
+
+	rc = cxl_map_afu_irq(ctx, 2, cxlflash_rrq_irq, afu,
+			     "SISL_MSI_RRQ_UPDATED");
+	if (!rc) {
+		dev_err(dev, "%s: IRQ 2 (SISL_MSI_RRQ_UPDATED) map failed!\n",
+			__func__);
+		level = UNMAP_ONE;
+		goto out;
+	}
+
+	rc = cxl_map_afu_irq(ctx, 3, cxlflash_async_err_irq, afu,
+			     "SISL_MSI_ASYNC_ERROR");
+	if (!rc) {
+		dev_err(dev, "%s: IRQ 3 (SISL_MSI_ASYNC_ERROR) map failed!\n",
+			__func__);
+		level = UNMAP_TWO;
+		goto out;
+	}
+
+	rc = 0;
+
+	/* This performs the equivalent of the CXL_IOCTL_START_WORK.
+	 * The CXL_IOCTL_GET_PROCESS_ELEMENT is implicit in the process
+	 * element (pe) that is embedded in the context (ctx)
+	 */
+	rc = start_context(cfg);
+	if (rc) {
+		dev_err(dev, "%s: start context failed rc=%d\n", __func__, rc);
+		level = UNMAP_THREE;
+		goto out;
+	}
+ret:
+	pr_debug("%s: returning rc=%d\n", __func__, rc);
+	return rc;
+out:
+	term_mc(cfg, level);
+	goto ret;
+}
+
+/**
+ * init_afu() - setup as master context and start AFU
+ * @cxlflash:	Internal structure associated with the host.
+ *
+ * This routine is a higher level of control for configuring the
+ * AFU on probe and reset paths.
+ *
+ * Return:
+ *	0 on success
+ *	-ENOMEM when unable to map the AFU MMIO space
+ *	A failure value from internal services.
+ */
+static int init_afu(struct cxlflash_cfg *cfg)
+{
+	u64 reg;
+	int rc = 0;
+	struct afu *afu = cfg->afu;
+	struct device *dev = &cfg->dev->dev;
+
+
+	rc = init_mc(cfg);
+	if (rc) {
+		dev_err(dev, "%s: call to init_mc failed, rc=%d!\n",
+			__func__, rc);
+		goto err1;
+	}
+
+	/* Map the entire MMIO space of the AFU.
+	 */
+	afu->afu_map = cxl_psa_map(cfg->mcctx);
+	if (!afu->afu_map) {
+		rc = -ENOMEM;
+		term_mc(cfg, UNDO_START);
+		dev_err(dev, "%s: call to cxl_psa_map failed!\n", __func__);
+		goto err1;
+	}
+
+	/* don't byte reverse on reading afu_version, else the string form */
+	/*     will be backwards */
+	reg = afu->afu_map->global.regs.afu_version;
+	memcpy(afu->version, &reg, 8);
+	afu->interface_version =
+	    readq_be(&afu->afu_map->global.regs.interface_version);
+	pr_debug("%s: afu version %s, interface version 0x%llX\n",
+		 __func__, afu->version, afu->interface_version);
+
+	rc = start_afu(cfg);
+	if (rc) {
+		dev_err(dev, "%s: call to start_afu failed, rc=%d!\n",
+			__func__, rc);
+		term_mc(cfg, UNDO_START);
+		cxl_psa_unmap((void *)afu->afu_map);
+		afu->afu_map = NULL;
+	}
+
+	afu_err_intr_init(cfg->afu);
+
+err1:
+	pr_debug("%s: returning rc=%d\n", __func__, rc);
+	return rc;
+}
+
+/**
+ * cxlflash_send_cmd() - sends an AFU command
+ * @afu:	AFU associated with the host.
+ * @cmd:	AFU command to send.
+ *
+ * Return:
+ *	0 on success
+ *	-1 on failure
+ */
+int cxlflash_send_cmd(struct afu *afu, struct afu_cmd *cmd)
+{
+	int nretry = 0;
+	int rc = 0;
+
+	if (afu->room == 0)
+		do {
+			afu->room = readq_be(&afu->host_map->cmd_room);
+			udelay(nretry);
+		} while ((afu->room == 0) && (nretry++ < MC_ROOM_RETRY_CNT));
+
+	cmd->sa.host_use_b[0] = 0;	/* 0 means active */
+	cmd->sa.ioasc = 0;
+
+	/* Only kick off the timer for internal commands */
+	if (cmd->internal) {
+		cmd->timer.expires = (jiffies +
+					(cmd->rcb.timeout * 2 * HZ));
+		add_timer(&cmd->timer);
+	} else if (cmd->rcb.timeout)
+		pr_err("%s: timer not started %d\n",
+		       __func__, cmd->rcb.timeout);
+
+	/* Write IOARRIN */
+	if (afu->room)
+		writeq_be((u64)&cmd->rcb, &afu->host_map->ioarrin);
+	else {
+		pr_err("%s: no cmd_room to send 0x%X\n",
+		       __func__, cmd->rcb.cdb[0]);
+		rc = -1;
+	}
+
+	pr_debug("%s: cmd=%p len=%d ea=%p rc=%d\n", __func__, cmd,
+		 cmd->rcb.data_len, (void *)cmd->rcb.data_ea, rc);
+
+	/* Let timer fire to complete the response... */
+	return rc;
+}
+
+/**
+ * cxlflash_wait_resp() - polls for a response or timeout to a sent AFU command
+ * @afu:	AFU associated with the host.
+ * @cmd:	AFU command that was sent.
+ */
+void cxlflash_wait_resp(struct afu *afu, struct afu_cmd *cmd)
+{
+	while (!(cmd->sa.host_use_b[0] & B_DONE))
+		cpu_relax();
+
+	del_timer(&cmd->timer);	/* already stopped if timer fired */
+
+	if (cmd->sa.ioasc != 0)
+		pr_err("%s: CMD 0x%X failed, IOASC: flags 0x%X, afu_rc 0x%X, "
+		       "scsi_rc 0x%X, fc_rc 0x%X\n", __func__, cmd->rcb.cdb[0],
+		       cmd->sa.rc.flags, cmd->sa.rc.afu_rc, cmd->sa.rc.scsi_rc,
+		       cmd->sa.rc.fc_rc);
+}
+
+/**
+ * cxlflash_afu_sync() - builds and sends an AFU sync command
+ * @afu:	AFU associated with the host.
+ * @ctx_hndl_u:	Identifies context requesting sync.
+ * @res_hndl_u:	Identifies resource requesting sync.
+ * @mode:	Type of sync to issue (lightweight, heavyweight, global).
+ *
+ * The AFU can only take 1 sync command at a time. This routine can be
+ * called from both interrupt and process context. The caller is responsible
+ * for any serialization.
+ *
+ * Return:
+ *	0 on success
+ *	-1 on failure
+ */
+int cxlflash_afu_sync(struct afu *afu, ctx_hndl_t ctx_hndl_u,
+		      res_hndl_t res_hndl_u, u8 mode)
+{
+	struct cxlflash_cfg *cfg = afu->parent;
+	struct afu_cmd *cmd;
+	int rc = 0;
+	int retry_cnt = 0;
+
+	while (cfg->sync_active) {
+		pr_debug("%s: sync issued while one is active\n", __func__);
+		wait_event(cfg->sync_wait_q, !cfg->sync_active);
+	}
+
+retry:
+	cmd = cxlflash_cmd_checkout(afu);
+	if (unlikely(!cmd)) {
+		retry_cnt++;
+		pr_debug("%s: could not get command on attempt %d\n",
+			 __func__, retry_cnt);
+		udelay(1000*retry_cnt);
+		if (retry_cnt < MC_RETRY_CNT)
+			goto retry;
+		pr_err("%s: could not get a free command\n", __func__);
+		rc = -1;
+		goto out;
+	}
+
+	pr_debug("%s: afu=%p cmd=%p %d\n", __func__, afu, cmd, ctx_hndl_u);
+
+	memset(cmd->rcb.cdb, 0, sizeof(cmd->rcb.cdb));
+
+	cmd->rcb.req_flags = SISL_REQ_FLAGS_AFU_CMD;
+	cmd->rcb.port_sel = 0x0;	/* NA */
+	cmd->rcb.lun_id = 0x0;	/* NA */
+	cmd->rcb.data_len = 0x0;
+	cmd->rcb.data_ea = 0x0;
+	cmd->internal = true;
+	cmd->sync = true;
+	cmd->rcb.timeout = MC_AFU_SYNC_TIMEOUT;
+
+	cmd->rcb.cdb[0] = 0xC0;	/* AFU Sync */
+	cmd->rcb.cdb[1] = mode;
+
+	cfg->sync_active = true;
+
+	/* The cdb is aligned, no unaligned accessors required */
+	*((u16 *)&cmd->rcb.cdb[2]) = swab16(ctx_hndl_u);
+	*((u32 *)&cmd->rcb.cdb[4]) = swab32(res_hndl_u);
+
+	rc = cxlflash_send_cmd(afu, cmd);
+	if (!rc)
+		cxlflash_wait_resp(afu, cmd);
+
+	if ((cmd->sa.ioasc != 0) || (cmd->sa.host_use_b[0] & B_ERROR)) {
+		rc = -1;
+		/* B_ERROR is set on timeout */
+	}
+
+out:
+	pr_debug("%s: returning rc=%d\n", __func__, rc);
+	return rc;
+}
+
+/**
+ * cxlflash_afu_reset() - resets the AFU
+ * @cxlflash:	Internal structure associated with the host.
+ *
+ * Return:
+ *	0 on success
+ *	A failure value from internal services.
+ */
+int cxlflash_afu_reset(struct cxlflash_cfg *cfg)
+{
+	int rc = 0;
+	/* Stop the context before the reset. Since the context is
+	 * no longer available restart it after the reset is complete
+	 */
+
+	term_afu(cfg);
+
+	rc = init_afu(cfg);
+
+	pr_debug("%s: returning rc=%d\n", __func__, rc);
+	return rc;
+}
+
+/**
+ * cxlflash_worker_thread() - work thread handler for the AFU
+ * @work:	Work structure contained within cxlflash associated with host.
+ *
+ * Handles link reset which cannot be performed on interrupt context due to
+ * blocking up to a few seconds.
+ */
+static void cxlflash_worker_thread(struct work_struct *work)
+{
+	struct cxlflash_cfg *cfg =
+	    container_of(work, struct cxlflash_cfg, work_q);
+	struct afu *afu = cfg->afu;
+	int port;
+	unsigned long lock_flags;
+
+	spin_lock_irqsave(cfg->host->host_lock, lock_flags);
+
+	if (cfg->lr_state == LINK_RESET_REQUIRED) {
+		port = cfg->lr_port;
+		if (port < 0)
+			pr_err("%s: invalid port index %d\n", __func__, port);
+		else
+			afu_link_reset(afu, port,
+				       &afu->afu_map->
+				       global.fc_regs[port][0]);
+		cfg->lr_state = LINK_RESET_COMPLETE;
+	}
+
+	spin_unlock_irqrestore(cfg->host->host_lock, lock_flags);
+}
+
+/**
+ * cxlflash_probe() - PCI entry point to add host
+ * @pdev:	PCI device associated with the host.
+ * @dev_id:	PCI device id associated with device.
+ *
+ * Return: 0 on success / non-zero on failure
+ */
+static int cxlflash_probe(struct pci_dev *pdev,
+			  const struct pci_device_id *dev_id)
+{
+	struct Scsi_Host *host;
+	struct cxlflash_cfg *cfg = NULL;
+	struct device *phys_dev;
+	struct dev_dependent_vals *ddv;
+	int rc = 0;
+
+	dev_dbg(&pdev->dev, "%s: Found CXLFLASH with IRQ: %d\n",
+		__func__, pdev->irq);
+
+	ddv = (struct dev_dependent_vals *)dev_id->driver_data;
+	driver_template.max_sectors = ddv->max_sectors;
+
+	host = scsi_host_alloc(&driver_template, sizeof(struct cxlflash_cfg));
+	if (!host) {
+		dev_err(&pdev->dev, "%s: call to scsi_host_alloc failed!\n",
+			__func__);
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	host->max_id = CXLFLASH_MAX_NUM_TARGETS_PER_BUS;
+	host->max_lun = CXLFLASH_MAX_NUM_LUNS_PER_TARGET;
+	host->max_channel = NUM_FC_PORTS - 1;
+	host->unique_id = host->host_no;
+	host->max_cmd_len = CXLFLASH_MAX_CDB_LEN;
+
+	cfg = (struct cxlflash_cfg *)host->hostdata;
+	cfg->host = host;
+	rc = alloc_mem(cfg);
+	if (rc) {
+		dev_err(&pdev->dev, "%s: call to scsi_host_alloc failed!\n",
+			__func__);
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	cfg->init_state = INIT_STATE_NONE;
+	cfg->dev = pdev;
+	cfg->dev_id = (struct pci_device_id *)dev_id;
+	cfg->tmf_active = 0;
+	cfg->mcctx = NULL;
+
+	init_waitqueue_head(&cfg->tmf_wait_q);
+	init_waitqueue_head(&cfg->eeh_wait_q);
+	init_waitqueue_head(&cfg->sync_wait_q);
+
+	INIT_WORK(&cfg->work_q, cxlflash_worker_thread);
+	cfg->lr_state = LINK_RESET_INVALID;
+	cfg->lr_port = -1;
+
+	pci_set_drvdata(pdev, cfg);
+
+	/* Use the special service provided to look up the physical
+	 * PCI device, since we are called on the probe of the virtual
+	 * PCI host bus (vphb)
+	 */
+	phys_dev = cxl_get_phys_dev(pdev);
+	if (!dev_is_pci(phys_dev)) {
+		pr_err("%s: not a pci dev\n", __func__);
+		rc = ENODEV;
+		goto out_remove;
+	}
+	cfg->parent_dev = to_pci_dev(phys_dev);
+
+	cfg->cxl_afu = cxl_pci_to_afu(pdev);
+	rc = init_afu(cfg);
+	if (rc) {
+		dev_err(&pdev->dev, "%s: call to init_afu "
+			"failed rc=%d!\n", __func__, rc);
+		goto out_remove;
+	}
+	cfg->init_state = INIT_STATE_AFU;
+
+	rc = init_pci(cfg);
+	if (rc) {
+		dev_err(&pdev->dev, "%s: call to init_pci "
+			"failed rc=%d!\n", __func__, rc);
+		goto out_remove;
+	}
+	cfg->init_state = INIT_STATE_PCI;
+
+	rc = init_scsi(cfg);
+	if (rc) {
+		dev_err(&pdev->dev, "%s: call to init_scsi "
+			"failed rc=%d!\n", __func__, rc);
+		goto out_remove;
+	}
+	cfg->init_state = INIT_STATE_SCSI;
+
+out:
+	pr_debug("%s: returning rc=%d\n", __func__, rc);
+	return rc;
+
+out_remove:
+	cxlflash_remove(pdev);
+	goto out;
+}
+
+/*
+ * PCI device structure
+ */
+static struct pci_driver cxlflash_driver = {
+	.name = CXLFLASH_NAME,
+	.id_table = cxlflash_pci_table,
+	.probe = cxlflash_probe,
+	.remove = cxlflash_remove,
+};
+
+/**
+ * init_cxlflash() - module entry point
+ *
+ * Return: 0 on success / non-zero on failure
+ */
+static int __init init_cxlflash(void)
+{
+	pr_info("%s: IBM Power CXL Flash Adapter: %s\n",
+		__func__, CXLFLASH_DRIVER_DATE);
+
+	return pci_register_driver(&cxlflash_driver);
+}
+
+/**
+ * exit_cxlflash() - module exit point
+ */
+static void __exit exit_cxlflash(void)
+{
+	pci_unregister_driver(&cxlflash_driver);
+}
+
+module_init(init_cxlflash);
+module_exit(exit_cxlflash);
diff --git a/drivers/scsi/cxlflash/main.h b/drivers/scsi/cxlflash/main.h
new file mode 100644
index 0000000..014ecb8
--- /dev/null
+++ b/drivers/scsi/cxlflash/main.h
@@ -0,0 +1,111 @@ 
+/*
+ * CXL Flash Device Driver
+ *
+ * Written by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>, IBM Corporation
+ *             Matthew R. Ochs <mrochs@linux.vnet.ibm.com>, IBM Corporation
+ *
+ * Copyright (C) 2015 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _CXLFLASH_MAIN_H
+#define _CXLFLASH_MAIN_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_device.h>
+
+typedef unsigned int useconds_t;        /* time in microseconds */
+
+#define CXLFLASH_NAME                      "cxlflash"
+#define CXLFLASH_ADAPTER_NAME              "IBM POWER CXL Flash Adapter"
+#define CXLFLASH_DRIVER_DATE              "(May 15, 2015)"
+
+#define PCI_DEVICE_ID_IBM_CORSA		0x04F0
+#define CXLFLASH_SUBS_DEV_ID		0x04F0
+
+/* Since there is only one target, make it 0 */
+#define CXLFLASH_TARGET                   0x0
+#define CXLFLASH_MAX_CDB_LEN		16
+
+/* Really only one target per bus since the Texan is directly attached */
+#define CXLFLASH_MAX_NUM_TARGETS_PER_BUS                     1
+#define CXLFLASH_MAX_NUM_LUNS_PER_TARGET                     65536
+
+#define CXLFLASH_PCI_ERROR_RECOVERY_TIMEOUT  (120 * HZ)
+
+#define NUM_FC_PORTS     CXLFLASH_NUM_FC_PORTS  /* ports per AFU */
+
+/* FC defines */
+#define FC_MTIP_CMDCONFIG 0x010
+#define FC_MTIP_STATUS 0x018
+
+#define FC_PNAME 0x300
+#define FC_CONFIG 0x320
+#define FC_CONFIG2 0x328
+#define FC_STATUS 0x330
+#define FC_ERROR 0x380
+#define FC_ERRCAP 0x388
+#define FC_ERRMSK 0x390
+#define FC_CNT_CRCERR 0x538
+#define FC_CRC_THRESH 0x580
+
+#define FC_MTIP_CMDCONFIG_ONLINE    0x20ull
+#define FC_MTIP_CMDCONFIG_OFFLINE   0x40ull
+
+#define FC_MTIP_STATUS_MASK         0x30ull
+#define FC_MTIP_STATUS_ONLINE       0x20ull
+#define FC_MTIP_STATUS_OFFLINE      0x10ull
+
+/* TIMEOUT and RETRY definitions */
+
+/* AFU command timeout values */
+#define MC_AFU_SYNC_TIMEOUT  5	/* 5 secs */
+
+/* AFU command room retry limit */
+#define MC_ROOM_RETRY_CNT    10
+
+/* FC CRC clear periodic timer */
+#define MC_CRC_THRESH 100	/* threshold in 5 mins */
+
+#define FC_PORT_STATUS_RETRY_CNT 100	/* 100 100ms retries = 10 seconds */
+#define FC_PORT_STATUS_RETRY_INTERVAL_US 100000	/* microseconds */
+
+/* VPD defines */
+#define CXLFLASH_VPD_LEN	256
+#define WWPN_LEN	16
+#define WWPN_BUF_LEN	(WWPN_LEN + 1)
+
+enum undo_level {
+	RELEASE_CONTEXT = 0,
+	FREE_IRQ,
+	UNMAP_ONE,
+	UNMAP_TWO,
+	UNMAP_THREE,
+	UNDO_START
+};
+
+struct dev_dependent_vals {
+	u64 max_sectors;
+};
+
+struct asyc_intr_info {
+	u64 status;
+	char *desc;
+	u8 port;
+	u8 action;
+#define CLR_FC_ERROR   0x01
+#define LINK_RESET     0x02
+};
+
+/*
+ * Externs and Prototypes
+ */
+int cxlflash_ioctl(struct scsi_device *, int, void __user *);
+
+#endif /* _CXLFLASH_MAIN_H */
diff --git a/drivers/scsi/cxlflash/sislite.h b/drivers/scsi/cxlflash/sislite.h
new file mode 100755
index 0000000..aeccf63
--- /dev/null
+++ b/drivers/scsi/cxlflash/sislite.h
@@ -0,0 +1,465 @@ 
+/*
+ * CXL Flash Device Driver
+ *
+ * Written by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>, IBM Corporation
+ *             Matthew R. Ochs <mrochs@linux.vnet.ibm.com>, IBM Corporation
+ *
+ * Copyright (C) 2015 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _SISLITE_H
+#define _SISLITE_H
+
+#include <linux/types.h>
+
+typedef u16 ctx_hndl_t;
+typedef u32 res_hndl_t;
+
+#define SIZE_4K		4096
+#define SIZE_64K	65536
+
+/*
+ * IOARCB: 64 bytes, min 16 byte alignment required, host native endianness
+ * except for SCSI CDB which remains big endian per SCSI standards.
+ */
+struct sisl_ioarcb {
+	u16 ctx_id;		/* ctx_hndl_t */
+	u16 req_flags;
+#define SISL_REQ_FLAGS_RES_HNDL       0x8000u	/* bit 0 (MSB) */
+#define SISL_REQ_FLAGS_PORT_LUN_ID    0x0000u
+
+#define SISL_REQ_FLAGS_SUP_UNDERRUN   0x4000u	/* bit 1 */
+
+#define SISL_REQ_FLAGS_TIMEOUT_SECS   0x0000u	/* bits 8,9 */
+#define SISL_REQ_FLAGS_TIMEOUT_MSECS  0x0040u
+#define SISL_REQ_FLAGS_TIMEOUT_USECS  0x0080u
+#define SISL_REQ_FLAGS_TIMEOUT_CYCLES 0x00C0u
+
+#define SISL_REQ_FLAGS_TMF_CMD        0x0004u	/* bit 13 */
+
+#define SISL_REQ_FLAGS_AFU_CMD        0x0002u	/* bit 14 */
+
+#define SISL_REQ_FLAGS_HOST_WRITE     0x0001u	/* bit 15 (LSB) */
+#define SISL_REQ_FLAGS_HOST_READ      0x0000u
+
+	union {
+		u32 res_hndl;	/* res_hndl_t */
+		u32 port_sel;	/* this is a selection mask:
+				 * 0x1 -> port#0 can be selected,
+				 * 0x2 -> port#1 can be selected.
+				 * Can be bitwise ORed.
+				 */
+	};
+	u64 lun_id;
+	u32 data_len;		/* 4K for read/write */
+	u32 ioadl_len;
+	union {
+		u64 data_ea;	/* min 16 byte aligned */
+		u64 ioadl_ea;
+	};
+	u8 msi;			/* LISN to send on RRQ write */
+#define SISL_MSI_CXL_PFAULT        0	/* reserved for CXL page faults */
+#define SISL_MSI_SYNC_ERROR        1	/* recommended for AFU sync error */
+#define SISL_MSI_RRQ_UPDATED       2	/* recommended for IO completion */
+#define SISL_MSI_ASYNC_ERROR       3	/* master only - for AFU async error */
+
+	u8 rrq;			/* 0 for a single RRQ */
+	u16 timeout;		/* in units specified by req_flags */
+	u32 rsvd1;
+	u8 cdb[16];		/* must be in big endian */
+	struct scsi_cmnd *scp;
+};
+
+struct sisl_rc {
+	u8 flags;
+#define SISL_RC_FLAGS_SENSE_VALID         0x80u
+#define SISL_RC_FLAGS_FCP_RSP_CODE_VALID  0x40u
+#define SISL_RC_FLAGS_OVERRUN             0x20u
+#define SISL_RC_FLAGS_UNDERRUN            0x10u
+
+	u8 afu_rc;
+#define SISL_AFU_RC_RHT_INVALID           0x01u	/* user error */
+#define SISL_AFU_RC_RHT_UNALIGNED         0x02u	/* should never happen */
+#define SISL_AFU_RC_RHT_OUT_OF_BOUNDS     0x03u	/* user error */
+#define SISL_AFU_RC_RHT_DMA_ERR           0x04u	/* see afu_extra
+						   may retry if afu_retry is off
+						   possible on master exit
+						 */
+#define SISL_AFU_RC_RHT_RW_PERM           0x05u	/* no RW perms, user error */
+#define SISL_AFU_RC_LXT_UNALIGNED         0x12u	/* should never happen */
+#define SISL_AFU_RC_LXT_OUT_OF_BOUNDS     0x13u	/* user error */
+#define SISL_AFU_RC_LXT_DMA_ERR           0x14u	/* see afu_extra
+						   may retry if afu_retry is off
+						   possible on master exit
+						 */
+#define SISL_AFU_RC_LXT_RW_PERM           0x15u	/* no RW perms, user error */
+
+#define SISL_AFU_RC_NOT_XLATE_HOST        0x1au	/* possible if master exited */
+
+	/* NO_CHANNELS means the FC ports selected by dest_port in
+	 * IOARCB or in the LXT entry are down when the AFU tried to select
+	 * a FC port. If the port went down on an active IO, it will set
+	 * fc_rc to =0x54(NOLOGI) or 0x57(LINKDOWN) instead.
+	 */
+#define SISL_AFU_RC_NO_CHANNELS           0x20u	/* see afu_extra, may retry */
+#define SISL_AFU_RC_CAP_VIOLATION         0x21u	/* either user error or
+						   afu reset/master restart
+						 */
+#define SISL_AFU_RC_OUT_OF_DATA_BUFS      0x30u	/* always retry */
+#define SISL_AFU_RC_DATA_DMA_ERR          0x31u	/* see afu_extra
+						   may retry if afu_retry is off
+						 */
+
+	u8 scsi_rc;		/* SCSI status byte, retry as appropriate */
+#define SISL_SCSI_RC_CHECK                0x02u
+#define SISL_SCSI_RC_BUSY                 0x08u
+
+	u8 fc_rc;		/* retry */
+	/*
+	 * We should only see fc_rc=0x57 (LINKDOWN) or 0x54(NOLOGI) for
+	 * commands that are in flight when a link goes down or is logged out.
+	 * If the link is down or logged out before AFU selects the port, either
+	 * it will choose the other port or we will get afu_rc=0x20 (no_channel)
+	 * if there is no valid port to use.
+	 *
+	 * ABORTPEND/ABORTOK/ABORTFAIL/TGTABORT can be retried, typically these
+	 * would happen if a frame is dropped and something times out.
+	 * NOLOGI or LINKDOWN can be retried if the other port is up.
+	 * RESIDERR can be retried as well.
+	 *
+	 * ABORTFAIL might indicate that lots of frames are getting CRC errors.
+	 * So it maybe retried once and reset the link if it happens again.
+	 * The link can also be reset on the CRC error threshold interrupt.
+	 */
+#define SISL_FC_RC_ABORTPEND	0x52	/* exchange timeout or abort request */
+#define SISL_FC_RC_WRABORTPEND	0x53	/* due to write XFER_RDY invalid */
+#define SISL_FC_RC_NOLOGI	0x54	/* port not logged in, in-flight cmds */
+#define SISL_FC_RC_NOEXP	0x55	/* FC protocol error or HW bug */
+#define SISL_FC_RC_INUSE	0x56	/* tag already in use, HW bug */
+#define SISL_FC_RC_LINKDOWN	0x57	/* link down, in-flight cmds */
+#define SISL_FC_RC_ABORTOK	0x58	/* pending abort completed w/success */
+#define SISL_FC_RC_ABORTFAIL	0x59	/* pending abort completed w/fail */
+#define SISL_FC_RC_RESID	0x5A	/* ioasa underrun/overrun flags set */
+#define SISL_FC_RC_RESIDERR	0x5B	/* actual data len does not match SCSI
+					   reported len, possbly due to dropped
+					   frames */
+#define SISL_FC_RC_TGTABORT	0x5C	/* command aborted by target */
+};
+
+#define SISL_SENSE_DATA_LEN     20	/* Sense data length         */
+
+/*
+ * IOASA: 64 bytes & must follow IOARCB, min 16 byte alignment required,
+ * host native endianness
+ */
+struct sisl_ioasa {
+	union {
+		struct sisl_rc rc;
+		u32 ioasc;
+#define SISL_IOASC_GOOD_COMPLETION        0x00000000u
+	};
+	u32 resid;
+	u8 port;
+	u8 afu_extra;
+	/* when afu_rc=0x04, 0x14, 0x31 (_xxx_DMA_ERR):
+	 * afu_exta contains PSL response code. Useful codes are:
+	 */
+#define SISL_AFU_DMA_ERR_PAGE_IN	0x0A	/* AFU_retry_on_pagein Action
+						 *  Enabled            N/A
+						 *  Disabled           retry
+						 */
+#define SISL_AFU_DMA_ERR_INVALID_EA	0x0B	/* this is a hard error
+						 * afu_rc	Implies
+						 * 0x04, 0x14	master exit.
+						 * 0x31         user error.
+						 */
+	/* when afu rc=0x20 (no channels):
+	 * afu_extra bits [4:5]: available portmask,  [6:7]: requested portmask.
+	 */
+#define SISL_AFU_NO_CLANNELS_AMASK(afu_extra) (((afu_extra) & 0x0C) >> 2)
+#define SISL_AFU_NO_CLANNELS_RMASK(afu_extra) ((afu_extra) & 0x03)
+
+	u8 scsi_extra;
+	u8 fc_extra;
+	u8 sense_data[SISL_SENSE_DATA_LEN];
+
+	/* These fields are defined by the SISlite architecture for the
+	 * host to use as they see fit for their implementation.
+	 */
+	union {
+		u64 host_use[4];
+		u8 host_use_b[32];
+	};
+};
+
+#define SISL_RESP_HANDLE_T_BIT        0x1ull	/* Toggle bit */
+
+/* MMIO space is required to support only 64-bit access */
+
+/*
+ * This AFU has two mechanisms to deal with endian-ness.
+ * One is a global configuration (in the afu_config) register
+ * below that specifies the endian-ness of the host.
+ * The other is a per context (i.e. application) specification
+ * controlled by the endian_ctrl field here. Since the master
+ * context is one such application the master context's
+ * endian-ness is set to be the same as the host.
+ *
+ * As per the SISlite spec, the MMIO registers are always
+ * big endian.
+ */
+#define SISL_ENDIAN_CTRL_BE           0x8000000000000080ull
+#define SISL_ENDIAN_CTRL_LE           0x0000000000000000ull
+
+#ifdef __BIG_ENDIAN
+#define SISL_ENDIAN_CTRL              SISL_ENDIAN_CTRL_BE
+#else
+#define SISL_ENDIAN_CTRL              SISL_ENDIAN_CTRL_LE
+#endif
+
+/* per context host transport MMIO  */
+struct sisl_host_map {
+	__be64 endian_ctrl;     /* Per context Endian Control. The AFU will
+			      * operate on whatever the context is of the
+			      * host application.
+			      */
+
+	__be64 intr_status;	/* this sends LISN# programmed in ctx_ctrl.
+				 * Only recovery in a PERM_ERR is a context
+				 * exit since there is no way to tell which
+				 * command caused the error.
+				 */
+#define SISL_ISTATUS_PERM_ERR_CMDROOM    0x0010ull	/* b59, user error */
+#define SISL_ISTATUS_PERM_ERR_RCB_READ   0x0008ull	/* b60, user error */
+#define SISL_ISTATUS_PERM_ERR_SA_WRITE   0x0004ull	/* b61, user error */
+#define SISL_ISTATUS_PERM_ERR_RRQ_WRITE  0x0002ull	/* b62, user error */
+	/* Page in wait accessing RCB/IOASA/RRQ is reported in b63.
+	 * Same error in data/LXT/RHT access is reported via IOASA.
+	 */
+#define SISL_ISTATUS_TEMP_ERR_PAGEIN     0x0001ull	/* b63, can be generated
+							 * only when AFU auto
+							 * retry is disabled.
+							 * If user can determine
+							 * the command that
+							 * caused the error, it
+							 * can be retried.
+							 */
+#define SISL_ISTATUS_UNMASK  (0x001Full)	/* 1 means unmasked */
+#define SISL_ISTATUS_MASK    ~(SISL_ISTATUS_UNMASK)	/* 1 means masked */
+
+	__be64 intr_clear;
+	__be64 intr_mask;
+	__be64 ioarrin;		/* only write what cmd_room permits */
+	__be64 rrq_start;	/* start & end are both inclusive */
+	__be64 rrq_end;		/* write sequence: start followed by end */
+	__be64 cmd_room;
+	__be64 ctx_ctrl;	/* least signiifcant byte or b56:63 is LISN# */
+	__be64 mbox_w;		/* restricted use */
+};
+
+/* per context provisioning & control MMIO */
+struct sisl_ctrl_map {
+	__be64 rht_start;
+	__be64 rht_cnt_id;
+	/* both cnt & ctx_id args must be ull */
+#define SISL_RHT_CNT_ID(cnt, ctx_id)  (((cnt) << 48) | ((ctx_id) << 32))
+
+	__be64 ctx_cap;	/* afu_rc below is when the capability is violated */
+#define SISL_CTX_CAP_PROXY_ISSUE       0x8000000000000000ull /* afu_rc 0x21 */
+#define SISL_CTX_CAP_REAL_MODE         0x4000000000000000ull /* afu_rc 0x21 */
+#define SISL_CTX_CAP_HOST_XLATE        0x2000000000000000ull /* afu_rc 0x1a */
+#define SISL_CTX_CAP_PROXY_TARGET      0x1000000000000000ull /* afu_rc 0x21 */
+#define SISL_CTX_CAP_AFU_CMD           0x0000000000000008ull /* afu_rc 0x21 */
+#define SISL_CTX_CAP_GSCSI_CMD         0x0000000000000004ull /* afu_rc 0x21 */
+#define SISL_CTX_CAP_WRITE_CMD         0x0000000000000002ull /* afu_rc 0x21 */
+#define SISL_CTX_CAP_READ_CMD          0x0000000000000001ull /* afu_rc 0x21 */
+	__be64 mbox_r;
+};
+
+/* single copy global regs */
+struct sisl_global_regs {
+	__be64 aintr_status;
+	/* In cxlflash, each FC port/link gets a byte of status */
+#define SISL_ASTATUS_FC0_OTHER	 0x8000ull /* b48, other err,
+					      FC_ERRCAP[31:20] */
+#define SISL_ASTATUS_FC0_LOGO    0x4000ull /* b49, target sent FLOGI/PLOGI/LOGO
+						   while logged in */
+#define SISL_ASTATUS_FC0_CRC_T   0x2000ull /* b50, CRC threshold exceeded */
+#define SISL_ASTATUS_FC0_LOGI_R  0x1000ull /* b51, login state mechine timed out
+						   and retrying */
+#define SISL_ASTATUS_FC0_LOGI_F  0x0800ull /* b52, login failed,
+					      FC_ERROR[19:0] */
+#define SISL_ASTATUS_FC0_LOGI_S  0x0400ull /* b53, login succeeded */
+#define SISL_ASTATUS_FC0_LINK_DN 0x0200ull /* b54, link online to offline */
+#define SISL_ASTATUS_FC0_LINK_UP 0x0100ull /* b55, link offline to online */
+
+#define SISL_ASTATUS_FC1_OTHER   0x0080ull /* b56 */
+#define SISL_ASTATUS_FC1_LOGO    0x0040ull /* b57 */
+#define SISL_ASTATUS_FC1_CRC_T   0x0020ull /* b58 */
+#define SISL_ASTATUS_FC1_LOGI_R  0x0010ull /* b59 */
+#define SISL_ASTATUS_FC1_LOGI_F  0x0008ull /* b60 */
+#define SISL_ASTATUS_FC1_LOGI_S  0x0004ull /* b61 */
+#define SISL_ASTATUS_FC1_LINK_DN 0x0002ull /* b62 */
+#define SISL_ASTATUS_FC1_LINK_UP 0x0001ull /* b63 */
+
+#define SISL_FC_INTERNAL_UNMASK	0x0000000300000000ull	/* 1 means unmasked */
+#define SISL_FC_INTERNAL_MASK	~(SISL_FC_INTERNAL_UNMASK)
+#define SISL_FC_INTERNAL_SHIFT	32
+
+#define SISL_ASTATUS_UNMASK	0xFFFFull		/* 1 means unmasked */
+#define SISL_ASTATUS_MASK	~(SISL_ASTATUS_UNMASK)	/* 1 means masked */
+
+	__be64 aintr_clear;
+	__be64 aintr_mask;
+	__be64 afu_ctrl;
+	__be64 afu_hb;
+	__be64 afu_scratch_pad;
+	__be64 afu_port_sel;
+#define SISL_AFUCONF_AR_IOARCB	0x4000ull
+#define SISL_AFUCONF_AR_LXT	0x2000ull
+#define SISL_AFUCONF_AR_RHT	0x1000ull
+#define SISL_AFUCONF_AR_DATA	0x0800ull
+#define SISL_AFUCONF_AR_RSRC	0x0400ull
+#define SISL_AFUCONF_AR_IOASA	0x0200ull
+#define SISL_AFUCONF_AR_RRQ	0x0100ull
+/* Aggregate all Auto Retry Bits */
+#define SISL_AFUCONF_AR_ALL	(SISL_AFUCONF_AR_IOARCB|SISL_AFUCONF_AR_LXT| \
+				 SISL_AFUCONF_AR_RHT|SISL_AFUCONF_AR_DATA|   \
+				 SISL_AFUCONF_AR_RSRC|SISL_AFUCONF_AR_IOASA| \
+				 SISL_AFUCONF_AR_RRQ)
+#ifdef __BIG_ENDIAN
+#define SISL_AFUCONF_ENDIAN            0x0000ull
+#else
+#define SISL_AFUCONF_ENDIAN            0x0020ull
+#endif
+#define SISL_AFUCONF_MBOX_CLR_READ     0x0010ull
+	__be64 afu_config;
+	__be64 rsvd[0xf8];
+	__be64 afu_version;
+	__be64 interface_version;
+};
+
+#define CXLFLASH_NUM_FC_PORTS   2
+#define CXLFLASH_MAX_CONTEXT  512	/* how many contexts per afu */
+#define CXLFLASH_NUM_VLUNS    512
+
+struct sisl_global_map {
+	union {
+		struct sisl_global_regs regs;
+		char page0[SIZE_4K];	/* page 0 */
+	};
+
+	char page1[SIZE_4K];	/* page 1 */
+
+	/* pages 2 & 3 */
+	__be64 fc_regs[CXLFLASH_NUM_FC_PORTS][CXLFLASH_NUM_VLUNS];
+
+	/* pages 4 & 5 (lun tbl) */
+	__be64 fc_port[CXLFLASH_NUM_FC_PORTS][CXLFLASH_NUM_VLUNS];
+
+};
+
+/*
+ * CXL Flash Memory Map
+ *
+ *	+-------------------------------+
+ *	|    512 * 64 KB User MMIO      |
+ *	|        (per context)          |
+ *	|       User Accessible         |
+ *	+-------------------------------+
+ *	|    512 * 128 B per context    |
+ *	|    Provisioning and Control   |
+ *	|   Trusted Process accessible  |
+ *	+-------------------------------+
+ *	|         64 KB Global          |
+ *	|   Trusted Process accessible  |
+ *	+-------------------------------+
+*/
+struct cxlflash_afu_map {
+	union {
+		struct sisl_host_map host;
+		char harea[SIZE_64K];	/* 64KB each */
+	} hosts[CXLFLASH_MAX_CONTEXT];
+
+	union {
+		struct sisl_ctrl_map ctrl;
+		char carea[cache_line_size()];	/* 128B each */
+	} ctrls[CXLFLASH_MAX_CONTEXT];
+
+	union {
+		struct sisl_global_map global;
+		char garea[SIZE_64K];	/* 64KB single block */
+	};
+};
+
+/* LBA translation control blocks */
+
+struct sisl_lxt_entry {
+	u64 rlba_base;	/* bits 0:47 is base
+				 * b48:55 is lun index
+				 * b58:59 is write & read perms
+				 * (if no perm, afu_rc=0x15)
+				 * b60:63 is port_sel mask
+				 */
+
+};
+
+/* Per the SISlite spec, RHT entries are to be 16-byte aligned */
+struct sisl_rht_entry {
+	struct sisl_lxt_entry *lxt_start;
+	u32 lxt_cnt;
+	u16 rsvd;
+	u8 fp;			/* format & perm nibbles.
+				 * (if no perm, afu_rc=0x05)
+				 */
+	u8 nmask;
+} __aligned(16);
+
+struct sisl_rht_entry_f1 {
+	u64 lun_id;
+	union {
+		struct {
+			u8 valid;
+			u8 rsvd[5];
+			u8 fp;
+			u8 port_sel;
+		};
+
+		u64 dw;
+	};
+} __aligned(16);
+
+/* make the fp byte */
+#define SISL_RHT_FP(fmt, perm) (((fmt) << 4) | (perm))
+
+/* make the fp byte for a clone from a source fp and clone flags
+ * flags must be only 2 LSB bits.
+ */
+#define SISL_RHT_FP_CLONE(src_fp, cln_flags) ((src_fp) & (0xFC | (cln_flags)))
+
+#define RHT_PERM_READ  0x01u
+#define RHT_PERM_WRITE 0x02u
+#define RHT_PERM_RW    (RHT_PERM_READ | RHT_PERM_WRITE)
+
+/* extract the perm bits from a fp */
+#define SISL_RHT_PERM(fp) ((fp) & RHT_PERM_RW)
+
+#define PORT0  0x01u
+#define PORT1  0x02u
+#define BOTH_PORTS    (PORT0 | PORT1)
+
+/* AFU Sync Mode byte */
+#define AFU_LW_SYNC 0x0u
+#define AFU_HW_SYNC 0x1u
+#define AFU_GSYNC   0x2u
+
+/* Special Task Management Function CDB */
+#define TMF_LUN_RESET  0x1u
+#define TMF_CLEAR_ACA  0x2u
+
+#endif /* _SISLITE_H */

[v2] cxlflash: Base support for IBM CXL Flash Adapter

Commit Message

Comments

Patch