Message ID | 1587626596-1044-5-git-send-email-suganath-prabu.subramani@broadcom.com (mailing list archive) |
---|---|
State | Accepted |
Headers | show |
Series | mpt3sas: Fix changing coherent mask after allocation | expand |
Looks good,
Reviewed-by: Christoph Hellwig <hch@lst.de>
On 4/23/20 9:23 AM, Suganath Prabu wrote: > For INVADER_SERIES each set of 8 reply queues (0 - 7, 8 - 15,..) and > VENTURA_SERIES each set of 16 reply queues (0 - 15, 16 - 31,..) should > be within 4 GB boundary. Driver uses limitation of VENTURA_SERIES to > manage INVADER_SERIES as well. So here driver is allocating the DMA able > memory for RDPQ's accordingly. > > 1) At driver load, set DMA Mask to 64 and allocate memory for RDPQ's. > 2) Check if allocated resources for RDPQ are in the same 4GB range. > 3) If #2 is true, continue with 64 bit DMA and go to #6 > 4) If #2 is false, then free all the resources from #1. > 5) Set DMA mask to 32 and allocate RDPQ's. > 6) Proceed with driver loading and other allocations > --- > v1 Change log: > 1) Use one dma pool for RDPQ's, thus removes the logic of using second > dma pool with align. > v2 Change log: > Added flag use_32bit_dma. If this flag is true, 32 bit coharent > dma is used. > > Signed-off-by: Suganath Prabu <suganath-prabu.subramani@broadcom.com> > Reviewed-by: Christoph Hellwig <hch@lst.de> > --- > drivers/scsi/mpt3sas/mpt3sas_base.c | 152 +++++++++++++++++++++++++----------- > drivers/scsi/mpt3sas/mpt3sas_base.h | 3 + > 2 files changed, 109 insertions(+), 46 deletions(-) > > diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c > index 0588941..af30e74 100644 > --- a/drivers/scsi/mpt3sas/mpt3sas_base.c > +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c > @@ -2810,7 +2810,7 @@ _base_config_dma_addressing(struct MPT3SAS_ADAPTER *ioc, struct pci_dev *pdev) > int dma_mask; > > if (ioc->is_mcpu_endpoint || > - sizeof(dma_addr_t) == 4 || > + sizeof(dma_addr_t) == 4 || ioc->use_32bit_dma || > dma_get_required_mask(&pdev->dev) <= 32) > dma_mask = 32; > /* Set 63 bit DMA mask for all SAS3 and SAS35 controllers */ > @@ -4807,8 +4807,8 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc) > { > int i = 0; > int j = 0; > + int dma_alloc_count = 0; > struct chain_tracker *ct; > - struct reply_post_struct *rps; > > dexitprintk(ioc, ioc_info(ioc, "%s\n", __func__)); > > @@ -4850,29 +4850,34 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc) > } > > if (ioc->reply_post) { > - do { > - rps = &ioc->reply_post[i]; > - if (rps->reply_post_free) { > - dma_pool_free( > - ioc->reply_post_free_dma_pool, > - rps->reply_post_free, > - rps->reply_post_free_dma); > - dexitprintk(ioc, > - ioc_info(ioc, "reply_post_free_pool(0x%p): free\n", > - rps->reply_post_free)); > - rps->reply_post_free = NULL; > + dma_alloc_count = DIV_ROUND_UP(ioc->reply_queue_count, > + RDPQ_MAX_INDEX_IN_ONE_CHUNK); > + for (i = 0; i < ioc->reply_queue_count; i++) { > + if (i % RDPQ_MAX_INDEX_IN_ONE_CHUNK == 0 > + && dma_alloc_count) { > + if (ioc->reply_post[i].reply_post_free) { > + dma_pool_free( > + ioc->reply_post_free_dma_pool, > + ioc->reply_post[i].reply_post_free, > + ioc->reply_post[i].reply_post_free_dma); > + dexitprintk(ioc, ioc_info(ioc, > + "reply_post_free_pool(0x%p): free\n", > + ioc->reply_post[i].reply_post_free)); > + ioc->reply_post[i].reply_post_free = > + NULL; > + } > + --dma_alloc_count; > } > - } while (ioc->rdpq_array_enable && > - (++i < ioc->reply_queue_count)); > + } > + dma_pool_destroy(ioc->reply_post_free_dma_pool); > if (ioc->reply_post_free_array && > ioc->rdpq_array_enable) { > dma_pool_free(ioc->reply_post_free_array_dma_pool, > - ioc->reply_post_free_array, > - ioc->reply_post_free_array_dma); > + ioc->reply_post_free_array, > + ioc->reply_post_free_array_dma); > ioc->reply_post_free_array = NULL; > } > dma_pool_destroy(ioc->reply_post_free_array_dma_pool); > - dma_pool_destroy(ioc->reply_post_free_dma_pool); > kfree(ioc->reply_post); > } > > @@ -4948,36 +4953,75 @@ mpt3sas_check_same_4gb_region(long reply_pool_start_address, u32 pool_sz) > static int > base_alloc_rdpq_dma_pool(struct MPT3SAS_ADAPTER *ioc, int sz) > { > - int i; > + int i = 0; > + u32 dma_alloc_count = 0; > + int reply_post_free_sz = ioc->reply_post_queue_depth * > + sizeof(Mpi2DefaultReplyDescriptor_t); > int count = ioc->rdpq_array_enable ? ioc->reply_queue_count : 1; > > ioc->reply_post = kcalloc(count, sizeof(struct reply_post_struct), > GFP_KERNEL); > if (!ioc->reply_post) > return -ENOMEM; > + /* > + * For INVADER_SERIES each set of 8 reply queues(0-7, 8-15, ..) and > + * VENTURA_SERIES each set of 16 reply queues(0-15, 16-31, ..) should > + * be within 4GB boundary i.e reply queues in a set must have same > + * upper 32-bits in their memory address. so here driver is allocating > + * the DMA'able memory for reply queues according. > + * Driver uses limitation of > + * VENTURA_SERIES to manage INVADER_SERIES as well. > + */ > + dma_alloc_count = DIV_ROUND_UP(ioc->reply_queue_count, > + RDPQ_MAX_INDEX_IN_ONE_CHUNK); > ioc->reply_post_free_dma_pool = > - dma_pool_create("reply_post_free pool", > - &ioc->pdev->dev, sz, 16, 0); > + dma_pool_create("reply_post_free pool", > + &ioc->pdev->dev, sz, 16, 0); > if (!ioc->reply_post_free_dma_pool) > return -ENOMEM; > - i = 0; > - do { > - ioc->reply_post[i].reply_post_free = > - dma_pool_zalloc(ioc->reply_post_free_dma_pool, > - GFP_KERNEL, > - &ioc->reply_post[i].reply_post_free_dma); > - if (!ioc->reply_post[i].reply_post_free) > - return -ENOMEM; > - dinitprintk(ioc, > - ioc_info(ioc, "reply post free pool (0x%p): depth(%d)," > - "element_size(%d), pool_size(%d kB)\n", > - ioc->reply_post[i].reply_post_free, > - ioc->reply_post_queue_depth, 8, sz / 1024)); > - dinitprintk(ioc, > - ioc_info(ioc, "reply_post_free_dma = (0x%llx)\n", > - (u64)ioc->reply_post[i].reply_post_free_dma)); > + for (i = 0; i < ioc->reply_queue_count; i++) { > + if ((i % RDPQ_MAX_INDEX_IN_ONE_CHUNK == 0) && dma_alloc_count) { > + ioc->reply_post[i].reply_post_free = ioc->reply_post is allocated with 'count' size, which may be smaller than ioc->reply_queue_count Probably this causes some older systems to not boot. The problem should be fixed before it comes to mainline. Cheers, tomash > + dma_pool_alloc(ioc->reply_post_free_dma_pool, > + GFP_KERNEL, > + &ioc->reply_post[i].reply_post_free_dma); > + if (!ioc->reply_post[i].reply_post_free) > + return -ENOMEM; > + /* > + * Each set of RDPQ pool must satisfy 4gb boundary > + * restriction. > + * 1) Check if allocated resources for RDPQ pool are in > + * the same 4GB range. > + * 2) If #1 is true, continue with 64 bit DMA. > + * 3) If #1 is false, return 1. which means free all the > + * resources and set DMA mask to 32 and allocate. > + */ > + if (!mpt3sas_check_same_4gb_region( > + (long)ioc->reply_post[i].reply_post_free, sz)) { > + dinitprintk(ioc, > + ioc_err(ioc, "bad Replypost free pool(0x%p)" > + "reply_post_free_dma = (0x%llx)\n", > + ioc->reply_post[i].reply_post_free, > + (unsigned long long) > + ioc->reply_post[i].reply_post_free_dma)); > + return -EAGAIN; > + } > + memset(ioc->reply_post[i].reply_post_free, 0, > + RDPQ_MAX_INDEX_IN_ONE_CHUNK * > + reply_post_free_sz); > + dma_alloc_count--; > > - } while (ioc->rdpq_array_enable && ++i < ioc->reply_queue_count); > + } else { > + ioc->reply_post[i].reply_post_free = > + (Mpi2ReplyDescriptorsUnion_t *) > + ((long)ioc->reply_post[i-1].reply_post_free > + + reply_post_free_sz); > + ioc->reply_post[i].reply_post_free_dma = > + (dma_addr_t) > + (ioc->reply_post[i-1].reply_post_free_dma + > + reply_post_free_sz); > + } > + } > return 0; > } > > @@ -4995,10 +5039,12 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc) > u16 chains_needed_per_io; > u32 sz, total_sz, reply_post_free_sz, reply_post_free_array_sz; > u32 retry_sz; > + u32 rdpq_sz = 0; > u16 max_request_credit, nvme_blocks_needed; > unsigned short sg_tablesize; > u16 sge_size; > int i, j; > + int ret = 0; > struct chain_tracker *ct; > > dinitprintk(ioc, ioc_info(ioc, "%s\n", __func__)); > @@ -5152,14 +5198,28 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc) > /* reply post queue, 16 byte align */ > reply_post_free_sz = ioc->reply_post_queue_depth * > sizeof(Mpi2DefaultReplyDescriptor_t); > - > - sz = reply_post_free_sz; > + rdpq_sz = reply_post_free_sz * RDPQ_MAX_INDEX_IN_ONE_CHUNK; > if (_base_is_controller_msix_enabled(ioc) && !ioc->rdpq_array_enable) > - sz *= ioc->reply_queue_count; > - if (base_alloc_rdpq_dma_pool(ioc, sz)) > - goto out; > - total_sz += sz * (!ioc->rdpq_array_enable ? 1 : ioc->reply_queue_count); > - > + rdpq_sz = reply_post_free_sz * ioc->reply_queue_count; > + ret = base_alloc_rdpq_dma_pool(ioc, rdpq_sz); > + if (ret == -EAGAIN) { > + /* > + * Free allocated bad RDPQ memory pools. > + * Change dma coherent mask to 32 bit and reallocate RDPQ > + */ > + _base_release_memory_pools(ioc); > + ioc->use_32bit_dma = true; > + if (_base_config_dma_addressing(ioc, ioc->pdev) != 0) { > + ioc_err(ioc, > + "32 DMA mask failed %s\n", pci_name(ioc->pdev)); > + return -ENODEV; > + } > + if (base_alloc_rdpq_dma_pool(ioc, rdpq_sz)) > + return -ENOMEM; > + } else if (ret == -ENOMEM) > + return -ENOMEM; > + total_sz = rdpq_sz * (!ioc->rdpq_array_enable ? 1 : > + DIV_ROUND_UP(ioc->reply_queue_count, RDPQ_MAX_INDEX_IN_ONE_CHUNK)); > ioc->scsiio_depth = ioc->hba_queue_depth - > ioc->hi_priority_depth - ioc->internal_depth; > > @@ -5171,7 +5231,6 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc) > ioc_info(ioc, "scsi host: can_queue depth (%d)\n", > ioc->shost->can_queue)); > > - > /* contiguous pool for request and chains, 16 byte align, one extra " > * "frame for smid=0 > */ > @@ -7141,6 +7200,7 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc) > ioc->smp_affinity_enable = smp_affinity_enable; > > ioc->rdpq_array_enable_assigned = 0; > + ioc->use_32bit_dma = 0; > if (ioc->is_aero_ioc) > ioc->base_readl = &_base_readl_aero; > else > diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h > index caae040..5a83971 100644 > --- a/drivers/scsi/mpt3sas/mpt3sas_base.h > +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h > @@ -367,6 +367,7 @@ struct mpt3sas_nvme_cmd { > #define MPT3SAS_HIGH_IOPS_REPLY_QUEUES 8 > #define MPT3SAS_HIGH_IOPS_BATCH_COUNT 16 > #define MPT3SAS_GEN35_MAX_MSIX_QUEUES 128 > +#define RDPQ_MAX_INDEX_IN_ONE_CHUNK 16 > > /* OEM Specific Flags will come from OEM specific header files */ > struct Mpi2ManufacturingPage10_t { > @@ -1063,6 +1064,7 @@ typedef void (*MPT3SAS_FLUSH_RUNNING_CMDS)(struct MPT3SAS_ADAPTER *ioc); > * @thresh_hold: Max number of reply descriptors processed > * before updating Host Index > * @drv_support_bitmap: driver's supported feature bit map > + * @use_32bit_dma: Flag to use 32 bit consistent dma mask > * @scsi_io_cb_idx: shost generated commands > * @tm_cb_idx: task management commands > * @scsih_cb_idx: scsih internal commands > @@ -1252,6 +1254,7 @@ struct MPT3SAS_ADAPTER { > u8 high_iops_queues; > u32 drv_support_bitmap; > bool enable_sdev_max_qd; > + bool use_32bit_dma; > > /* internal commands, callback index */ > u8 scsi_io_cb_idx; >
On Thu, May 21, 2020 at 8:42 PM Tomas Henzl <thenzl@redhat.com> wrote: > > On 4/23/20 9:23 AM, Suganath Prabu wrote: > > For INVADER_SERIES each set of 8 reply queues (0 - 7, 8 - 15,..) and > > VENTURA_SERIES each set of 16 reply queues (0 - 15, 16 - 31,..) should > > be within 4 GB boundary. Driver uses limitation of VENTURA_SERIES to > > manage INVADER_SERIES as well. So here driver is allocating the DMA able > > memory for RDPQ's accordingly. > > > > 1) At driver load, set DMA Mask to 64 and allocate memory for RDPQ's. > > 2) Check if allocated resources for RDPQ are in the same 4GB range. > > 3) If #2 is true, continue with 64 bit DMA and go to #6 > > 4) If #2 is false, then free all the resources from #1. > > 5) Set DMA mask to 32 and allocate RDPQ's. > > 6) Proceed with driver loading and other allocations > > --- > > v1 Change log: > > 1) Use one dma pool for RDPQ's, thus removes the logic of using second > > dma pool with align. > > v2 Change log: > > Added flag use_32bit_dma. If this flag is true, 32 bit coharent > > dma is used. > > > > Signed-off-by: Suganath Prabu <suganath-prabu.subramani@broadcom.com> > > Reviewed-by: Christoph Hellwig <hch@lst.de> > > --- > > drivers/scsi/mpt3sas/mpt3sas_base.c | 152 +++++++++++++++++++++++++----------- > > drivers/scsi/mpt3sas/mpt3sas_base.h | 3 + > > 2 files changed, 109 insertions(+), 46 deletions(-) > > > > diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c > > index 0588941..af30e74 100644 > > --- a/drivers/scsi/mpt3sas/mpt3sas_base.c > > +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c > > @@ -2810,7 +2810,7 @@ _base_config_dma_addressing(struct MPT3SAS_ADAPTER *ioc, struct pci_dev *pdev) > > int dma_mask; > > > > if (ioc->is_mcpu_endpoint || > > - sizeof(dma_addr_t) == 4 || > > + sizeof(dma_addr_t) == 4 || ioc->use_32bit_dma || > > dma_get_required_mask(&pdev->dev) <= 32) > > dma_mask = 32; > > /* Set 63 bit DMA mask for all SAS3 and SAS35 controllers */ > > @@ -4807,8 +4807,8 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc) > > { > > int i = 0; > > int j = 0; > > + int dma_alloc_count = 0; > > struct chain_tracker *ct; > > - struct reply_post_struct *rps; > > > > dexitprintk(ioc, ioc_info(ioc, "%s\n", __func__)); > > > > @@ -4850,29 +4850,34 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc) > > } > > > > if (ioc->reply_post) { > > - do { > > - rps = &ioc->reply_post[i]; > > - if (rps->reply_post_free) { > > - dma_pool_free( > > - ioc->reply_post_free_dma_pool, > > - rps->reply_post_free, > > - rps->reply_post_free_dma); > > - dexitprintk(ioc, > > - ioc_info(ioc, "reply_post_free_pool(0x%p): free\n", > > - rps->reply_post_free)); > > - rps->reply_post_free = NULL; > > + dma_alloc_count = DIV_ROUND_UP(ioc->reply_queue_count, > > + RDPQ_MAX_INDEX_IN_ONE_CHUNK); > > + for (i = 0; i < ioc->reply_queue_count; i++) { > > + if (i % RDPQ_MAX_INDEX_IN_ONE_CHUNK == 0 > > + && dma_alloc_count) { > > + if (ioc->reply_post[i].reply_post_free) { > > + dma_pool_free( > > + ioc->reply_post_free_dma_pool, > > + ioc->reply_post[i].reply_post_free, > > + ioc->reply_post[i].reply_post_free_dma); > > + dexitprintk(ioc, ioc_info(ioc, > > + "reply_post_free_pool(0x%p): free\n", > > + ioc->reply_post[i].reply_post_free)); > > + ioc->reply_post[i].reply_post_free = > > + NULL; > > + } > > + --dma_alloc_count; > > } > > - } while (ioc->rdpq_array_enable && > > - (++i < ioc->reply_queue_count)); > > + } > > + dma_pool_destroy(ioc->reply_post_free_dma_pool); > > if (ioc->reply_post_free_array && > > ioc->rdpq_array_enable) { > > dma_pool_free(ioc->reply_post_free_array_dma_pool, > > - ioc->reply_post_free_array, > > - ioc->reply_post_free_array_dma); > > + ioc->reply_post_free_array, > > + ioc->reply_post_free_array_dma); > > ioc->reply_post_free_array = NULL; > > } > > dma_pool_destroy(ioc->reply_post_free_array_dma_pool); > > - dma_pool_destroy(ioc->reply_post_free_dma_pool); > > kfree(ioc->reply_post); > > } > > > > @@ -4948,36 +4953,75 @@ mpt3sas_check_same_4gb_region(long reply_pool_start_address, u32 pool_sz) > > static int > > base_alloc_rdpq_dma_pool(struct MPT3SAS_ADAPTER *ioc, int sz) > > { > > - int i; > > + int i = 0; > > + u32 dma_alloc_count = 0; > > + int reply_post_free_sz = ioc->reply_post_queue_depth * > > + sizeof(Mpi2DefaultReplyDescriptor_t); > > int count = ioc->rdpq_array_enable ? ioc->reply_queue_count : 1; > > > > ioc->reply_post = kcalloc(count, sizeof(struct reply_post_struct), > > GFP_KERNEL); > > if (!ioc->reply_post) > > return -ENOMEM; > > + /* > > + * For INVADER_SERIES each set of 8 reply queues(0-7, 8-15, ..) and > > + * VENTURA_SERIES each set of 16 reply queues(0-15, 16-31, ..) should > > + * be within 4GB boundary i.e reply queues in a set must have same > > + * upper 32-bits in their memory address. so here driver is allocating > > + * the DMA'able memory for reply queues according. > > + * Driver uses limitation of > > + * VENTURA_SERIES to manage INVADER_SERIES as well. > > + */ > > + dma_alloc_count = DIV_ROUND_UP(ioc->reply_queue_count, > > + RDPQ_MAX_INDEX_IN_ONE_CHUNK); > > ioc->reply_post_free_dma_pool = > > - dma_pool_create("reply_post_free pool", > > - &ioc->pdev->dev, sz, 16, 0); > > + dma_pool_create("reply_post_free pool", > > + &ioc->pdev->dev, sz, 16, 0); > > if (!ioc->reply_post_free_dma_pool) > > return -ENOMEM; > > - i = 0; > > - do { > > - ioc->reply_post[i].reply_post_free = > > - dma_pool_zalloc(ioc->reply_post_free_dma_pool, > > - GFP_KERNEL, > > - &ioc->reply_post[i].reply_post_free_dma); > > - if (!ioc->reply_post[i].reply_post_free) > > - return -ENOMEM; > > - dinitprintk(ioc, > > - ioc_info(ioc, "reply post free pool (0x%p): depth(%d)," > > - "element_size(%d), pool_size(%d kB)\n", > > - ioc->reply_post[i].reply_post_free, > > - ioc->reply_post_queue_depth, 8, sz / 1024)); > > - dinitprintk(ioc, > > - ioc_info(ioc, "reply_post_free_dma = (0x%llx)\n", > > - (u64)ioc->reply_post[i].reply_post_free_dma)); > > + for (i = 0; i < ioc->reply_queue_count; i++) { > > + if ((i % RDPQ_MAX_INDEX_IN_ONE_CHUNK == 0) && dma_alloc_count) { > > + ioc->reply_post[i].reply_post_free = > ioc->reply_post is allocated with 'count' size, which may be smaller > than ioc->reply_queue_count > Probably this causes some older systems to not boot. > The problem should be fixed before it comes to mainline. > > Cheers, > tomash Tomash, thanks for identifying this bug. We will post the fix patch today. Thanks, Sreekanth > > > > + dma_pool_alloc(ioc->reply_post_free_dma_pool, > > + GFP_KERNEL, > > + &ioc->reply_post[i].reply_post_free_dma); > > + if (!ioc->reply_post[i].reply_post_free) > > + return -ENOMEM; > > + /* > > + * Each set of RDPQ pool must satisfy 4gb boundary > > + * restriction. > > + * 1) Check if allocated resources for RDPQ pool are in > > + * the same 4GB range. > > + * 2) If #1 is true, continue with 64 bit DMA. > > + * 3) If #1 is false, return 1. which means free all the > > + * resources and set DMA mask to 32 and allocate. > > + */ > > + if (!mpt3sas_check_same_4gb_region( > > + (long)ioc->reply_post[i].reply_post_free, sz)) { > > + dinitprintk(ioc, > > + ioc_err(ioc, "bad Replypost free pool(0x%p)" > > + "reply_post_free_dma = (0x%llx)\n", > > + ioc->reply_post[i].reply_post_free, > > + (unsigned long long) > > + ioc->reply_post[i].reply_post_free_dma)); > > + return -EAGAIN; > > + } > > + memset(ioc->reply_post[i].reply_post_free, 0, > > + RDPQ_MAX_INDEX_IN_ONE_CHUNK * > > + reply_post_free_sz); > > + dma_alloc_count--; > > > > - } while (ioc->rdpq_array_enable && ++i < ioc->reply_queue_count); > > + } else { > > + ioc->reply_post[i].reply_post_free = > > + (Mpi2ReplyDescriptorsUnion_t *) > > + ((long)ioc->reply_post[i-1].reply_post_free > > + + reply_post_free_sz); > > + ioc->reply_post[i].reply_post_free_dma = > > + (dma_addr_t) > > + (ioc->reply_post[i-1].reply_post_free_dma + > > + reply_post_free_sz); > > + } > > + } > > return 0; > > } > > > > @@ -4995,10 +5039,12 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc) > > u16 chains_needed_per_io; > > u32 sz, total_sz, reply_post_free_sz, reply_post_free_array_sz; > > u32 retry_sz; > > + u32 rdpq_sz = 0; > > u16 max_request_credit, nvme_blocks_needed; > > unsigned short sg_tablesize; > > u16 sge_size; > > int i, j; > > + int ret = 0; > > struct chain_tracker *ct; > > > > dinitprintk(ioc, ioc_info(ioc, "%s\n", __func__)); > > @@ -5152,14 +5198,28 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc) > > /* reply post queue, 16 byte align */ > > reply_post_free_sz = ioc->reply_post_queue_depth * > > sizeof(Mpi2DefaultReplyDescriptor_t); > > - > > - sz = reply_post_free_sz; > > + rdpq_sz = reply_post_free_sz * RDPQ_MAX_INDEX_IN_ONE_CHUNK; > > if (_base_is_controller_msix_enabled(ioc) && !ioc->rdpq_array_enable) > > - sz *= ioc->reply_queue_count; > > - if (base_alloc_rdpq_dma_pool(ioc, sz)) > > - goto out; > > - total_sz += sz * (!ioc->rdpq_array_enable ? 1 : ioc->reply_queue_count); > > - > > + rdpq_sz = reply_post_free_sz * ioc->reply_queue_count; > > + ret = base_alloc_rdpq_dma_pool(ioc, rdpq_sz); > > + if (ret == -EAGAIN) { > > + /* > > + * Free allocated bad RDPQ memory pools. > > + * Change dma coherent mask to 32 bit and reallocate RDPQ > > + */ > > + _base_release_memory_pools(ioc); > > + ioc->use_32bit_dma = true; > > + if (_base_config_dma_addressing(ioc, ioc->pdev) != 0) { > > + ioc_err(ioc, > > + "32 DMA mask failed %s\n", pci_name(ioc->pdev)); > > + return -ENODEV; > > + } > > + if (base_alloc_rdpq_dma_pool(ioc, rdpq_sz)) > > + return -ENOMEM; > > + } else if (ret == -ENOMEM) > > + return -ENOMEM; > > + total_sz = rdpq_sz * (!ioc->rdpq_array_enable ? 1 : > > + DIV_ROUND_UP(ioc->reply_queue_count, RDPQ_MAX_INDEX_IN_ONE_CHUNK)); > > ioc->scsiio_depth = ioc->hba_queue_depth - > > ioc->hi_priority_depth - ioc->internal_depth; > > > > @@ -5171,7 +5231,6 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc) > > ioc_info(ioc, "scsi host: can_queue depth (%d)\n", > > ioc->shost->can_queue)); > > > > - > > /* contiguous pool for request and chains, 16 byte align, one extra " > > * "frame for smid=0 > > */ > > @@ -7141,6 +7200,7 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc) > > ioc->smp_affinity_enable = smp_affinity_enable; > > > > ioc->rdpq_array_enable_assigned = 0; > > + ioc->use_32bit_dma = 0; > > if (ioc->is_aero_ioc) > > ioc->base_readl = &_base_readl_aero; > > else > > diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h > > index caae040..5a83971 100644 > > --- a/drivers/scsi/mpt3sas/mpt3sas_base.h > > +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h > > @@ -367,6 +367,7 @@ struct mpt3sas_nvme_cmd { > > #define MPT3SAS_HIGH_IOPS_REPLY_QUEUES 8 > > #define MPT3SAS_HIGH_IOPS_BATCH_COUNT 16 > > #define MPT3SAS_GEN35_MAX_MSIX_QUEUES 128 > > +#define RDPQ_MAX_INDEX_IN_ONE_CHUNK 16 > > > > /* OEM Specific Flags will come from OEM specific header files */ > > struct Mpi2ManufacturingPage10_t { > > @@ -1063,6 +1064,7 @@ typedef void (*MPT3SAS_FLUSH_RUNNING_CMDS)(struct MPT3SAS_ADAPTER *ioc); > > * @thresh_hold: Max number of reply descriptors processed > > * before updating Host Index > > * @drv_support_bitmap: driver's supported feature bit map > > + * @use_32bit_dma: Flag to use 32 bit consistent dma mask > > * @scsi_io_cb_idx: shost generated commands > > * @tm_cb_idx: task management commands > > * @scsih_cb_idx: scsih internal commands > > @@ -1252,6 +1254,7 @@ struct MPT3SAS_ADAPTER { > > u8 high_iops_queues; > > u32 drv_support_bitmap; > > bool enable_sdev_max_qd; > > + bool use_32bit_dma; > > > > /* internal commands, callback index */ > > u8 scsi_io_cb_idx; > > >
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 0588941..af30e74 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -2810,7 +2810,7 @@ _base_config_dma_addressing(struct MPT3SAS_ADAPTER *ioc, struct pci_dev *pdev) int dma_mask; if (ioc->is_mcpu_endpoint || - sizeof(dma_addr_t) == 4 || + sizeof(dma_addr_t) == 4 || ioc->use_32bit_dma || dma_get_required_mask(&pdev->dev) <= 32) dma_mask = 32; /* Set 63 bit DMA mask for all SAS3 and SAS35 controllers */ @@ -4807,8 +4807,8 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc) { int i = 0; int j = 0; + int dma_alloc_count = 0; struct chain_tracker *ct; - struct reply_post_struct *rps; dexitprintk(ioc, ioc_info(ioc, "%s\n", __func__)); @@ -4850,29 +4850,34 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc) } if (ioc->reply_post) { - do { - rps = &ioc->reply_post[i]; - if (rps->reply_post_free) { - dma_pool_free( - ioc->reply_post_free_dma_pool, - rps->reply_post_free, - rps->reply_post_free_dma); - dexitprintk(ioc, - ioc_info(ioc, "reply_post_free_pool(0x%p): free\n", - rps->reply_post_free)); - rps->reply_post_free = NULL; + dma_alloc_count = DIV_ROUND_UP(ioc->reply_queue_count, + RDPQ_MAX_INDEX_IN_ONE_CHUNK); + for (i = 0; i < ioc->reply_queue_count; i++) { + if (i % RDPQ_MAX_INDEX_IN_ONE_CHUNK == 0 + && dma_alloc_count) { + if (ioc->reply_post[i].reply_post_free) { + dma_pool_free( + ioc->reply_post_free_dma_pool, + ioc->reply_post[i].reply_post_free, + ioc->reply_post[i].reply_post_free_dma); + dexitprintk(ioc, ioc_info(ioc, + "reply_post_free_pool(0x%p): free\n", + ioc->reply_post[i].reply_post_free)); + ioc->reply_post[i].reply_post_free = + NULL; + } + --dma_alloc_count; } - } while (ioc->rdpq_array_enable && - (++i < ioc->reply_queue_count)); + } + dma_pool_destroy(ioc->reply_post_free_dma_pool); if (ioc->reply_post_free_array && ioc->rdpq_array_enable) { dma_pool_free(ioc->reply_post_free_array_dma_pool, - ioc->reply_post_free_array, - ioc->reply_post_free_array_dma); + ioc->reply_post_free_array, + ioc->reply_post_free_array_dma); ioc->reply_post_free_array = NULL; } dma_pool_destroy(ioc->reply_post_free_array_dma_pool); - dma_pool_destroy(ioc->reply_post_free_dma_pool); kfree(ioc->reply_post); } @@ -4948,36 +4953,75 @@ mpt3sas_check_same_4gb_region(long reply_pool_start_address, u32 pool_sz) static int base_alloc_rdpq_dma_pool(struct MPT3SAS_ADAPTER *ioc, int sz) { - int i; + int i = 0; + u32 dma_alloc_count = 0; + int reply_post_free_sz = ioc->reply_post_queue_depth * + sizeof(Mpi2DefaultReplyDescriptor_t); int count = ioc->rdpq_array_enable ? ioc->reply_queue_count : 1; ioc->reply_post = kcalloc(count, sizeof(struct reply_post_struct), GFP_KERNEL); if (!ioc->reply_post) return -ENOMEM; + /* + * For INVADER_SERIES each set of 8 reply queues(0-7, 8-15, ..) and + * VENTURA_SERIES each set of 16 reply queues(0-15, 16-31, ..) should + * be within 4GB boundary i.e reply queues in a set must have same + * upper 32-bits in their memory address. so here driver is allocating + * the DMA'able memory for reply queues according. + * Driver uses limitation of + * VENTURA_SERIES to manage INVADER_SERIES as well. + */ + dma_alloc_count = DIV_ROUND_UP(ioc->reply_queue_count, + RDPQ_MAX_INDEX_IN_ONE_CHUNK); ioc->reply_post_free_dma_pool = - dma_pool_create("reply_post_free pool", - &ioc->pdev->dev, sz, 16, 0); + dma_pool_create("reply_post_free pool", + &ioc->pdev->dev, sz, 16, 0); if (!ioc->reply_post_free_dma_pool) return -ENOMEM; - i = 0; - do { - ioc->reply_post[i].reply_post_free = - dma_pool_zalloc(ioc->reply_post_free_dma_pool, - GFP_KERNEL, - &ioc->reply_post[i].reply_post_free_dma); - if (!ioc->reply_post[i].reply_post_free) - return -ENOMEM; - dinitprintk(ioc, - ioc_info(ioc, "reply post free pool (0x%p): depth(%d)," - "element_size(%d), pool_size(%d kB)\n", - ioc->reply_post[i].reply_post_free, - ioc->reply_post_queue_depth, 8, sz / 1024)); - dinitprintk(ioc, - ioc_info(ioc, "reply_post_free_dma = (0x%llx)\n", - (u64)ioc->reply_post[i].reply_post_free_dma)); + for (i = 0; i < ioc->reply_queue_count; i++) { + if ((i % RDPQ_MAX_INDEX_IN_ONE_CHUNK == 0) && dma_alloc_count) { + ioc->reply_post[i].reply_post_free = + dma_pool_alloc(ioc->reply_post_free_dma_pool, + GFP_KERNEL, + &ioc->reply_post[i].reply_post_free_dma); + if (!ioc->reply_post[i].reply_post_free) + return -ENOMEM; + /* + * Each set of RDPQ pool must satisfy 4gb boundary + * restriction. + * 1) Check if allocated resources for RDPQ pool are in + * the same 4GB range. + * 2) If #1 is true, continue with 64 bit DMA. + * 3) If #1 is false, return 1. which means free all the + * resources and set DMA mask to 32 and allocate. + */ + if (!mpt3sas_check_same_4gb_region( + (long)ioc->reply_post[i].reply_post_free, sz)) { + dinitprintk(ioc, + ioc_err(ioc, "bad Replypost free pool(0x%p)" + "reply_post_free_dma = (0x%llx)\n", + ioc->reply_post[i].reply_post_free, + (unsigned long long) + ioc->reply_post[i].reply_post_free_dma)); + return -EAGAIN; + } + memset(ioc->reply_post[i].reply_post_free, 0, + RDPQ_MAX_INDEX_IN_ONE_CHUNK * + reply_post_free_sz); + dma_alloc_count--; - } while (ioc->rdpq_array_enable && ++i < ioc->reply_queue_count); + } else { + ioc->reply_post[i].reply_post_free = + (Mpi2ReplyDescriptorsUnion_t *) + ((long)ioc->reply_post[i-1].reply_post_free + + reply_post_free_sz); + ioc->reply_post[i].reply_post_free_dma = + (dma_addr_t) + (ioc->reply_post[i-1].reply_post_free_dma + + reply_post_free_sz); + } + } return 0; } @@ -4995,10 +5039,12 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc) u16 chains_needed_per_io; u32 sz, total_sz, reply_post_free_sz, reply_post_free_array_sz; u32 retry_sz; + u32 rdpq_sz = 0; u16 max_request_credit, nvme_blocks_needed; unsigned short sg_tablesize; u16 sge_size; int i, j; + int ret = 0; struct chain_tracker *ct; dinitprintk(ioc, ioc_info(ioc, "%s\n", __func__)); @@ -5152,14 +5198,28 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc) /* reply post queue, 16 byte align */ reply_post_free_sz = ioc->reply_post_queue_depth * sizeof(Mpi2DefaultReplyDescriptor_t); - - sz = reply_post_free_sz; + rdpq_sz = reply_post_free_sz * RDPQ_MAX_INDEX_IN_ONE_CHUNK; if (_base_is_controller_msix_enabled(ioc) && !ioc->rdpq_array_enable) - sz *= ioc->reply_queue_count; - if (base_alloc_rdpq_dma_pool(ioc, sz)) - goto out; - total_sz += sz * (!ioc->rdpq_array_enable ? 1 : ioc->reply_queue_count); - + rdpq_sz = reply_post_free_sz * ioc->reply_queue_count; + ret = base_alloc_rdpq_dma_pool(ioc, rdpq_sz); + if (ret == -EAGAIN) { + /* + * Free allocated bad RDPQ memory pools. + * Change dma coherent mask to 32 bit and reallocate RDPQ + */ + _base_release_memory_pools(ioc); + ioc->use_32bit_dma = true; + if (_base_config_dma_addressing(ioc, ioc->pdev) != 0) { + ioc_err(ioc, + "32 DMA mask failed %s\n", pci_name(ioc->pdev)); + return -ENODEV; + } + if (base_alloc_rdpq_dma_pool(ioc, rdpq_sz)) + return -ENOMEM; + } else if (ret == -ENOMEM) + return -ENOMEM; + total_sz = rdpq_sz * (!ioc->rdpq_array_enable ? 1 : + DIV_ROUND_UP(ioc->reply_queue_count, RDPQ_MAX_INDEX_IN_ONE_CHUNK)); ioc->scsiio_depth = ioc->hba_queue_depth - ioc->hi_priority_depth - ioc->internal_depth; @@ -5171,7 +5231,6 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc) ioc_info(ioc, "scsi host: can_queue depth (%d)\n", ioc->shost->can_queue)); - /* contiguous pool for request and chains, 16 byte align, one extra " * "frame for smid=0 */ @@ -7141,6 +7200,7 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc) ioc->smp_affinity_enable = smp_affinity_enable; ioc->rdpq_array_enable_assigned = 0; + ioc->use_32bit_dma = 0; if (ioc->is_aero_ioc) ioc->base_readl = &_base_readl_aero; else diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index caae040..5a83971 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -367,6 +367,7 @@ struct mpt3sas_nvme_cmd { #define MPT3SAS_HIGH_IOPS_REPLY_QUEUES 8 #define MPT3SAS_HIGH_IOPS_BATCH_COUNT 16 #define MPT3SAS_GEN35_MAX_MSIX_QUEUES 128 +#define RDPQ_MAX_INDEX_IN_ONE_CHUNK 16 /* OEM Specific Flags will come from OEM specific header files */ struct Mpi2ManufacturingPage10_t { @@ -1063,6 +1064,7 @@ typedef void (*MPT3SAS_FLUSH_RUNNING_CMDS)(struct MPT3SAS_ADAPTER *ioc); * @thresh_hold: Max number of reply descriptors processed * before updating Host Index * @drv_support_bitmap: driver's supported feature bit map + * @use_32bit_dma: Flag to use 32 bit consistent dma mask * @scsi_io_cb_idx: shost generated commands * @tm_cb_idx: task management commands * @scsih_cb_idx: scsih internal commands @@ -1252,6 +1254,7 @@ struct MPT3SAS_ADAPTER { u8 high_iops_queues; u32 drv_support_bitmap; bool enable_sdev_max_qd; + bool use_32bit_dma; /* internal commands, callback index */ u8 scsi_io_cb_idx;