diff mbox series

HID: intel-ish-hid: Use dma_alloc_coherent for firmware update

Message ID 20220209050947.2119465-1-gwendal@chromium.org (mailing list archive)
State Mainlined
Commit f97ec5d75e9261a5da78dc28a8955b7cc0c4468b
Delegated to: Jiri Kosina
Headers show
Series HID: intel-ish-hid: Use dma_alloc_coherent for firmware update | expand

Commit Message

Gwendal Grignou Feb. 9, 2022, 5:09 a.m. UTC
Allocating memory with kmalloc and GPF_DMA32 is not allowed, the
allocator will ignore the attribute.

Instead, use dma_alloc_coherent() API as we allocate a small amount of
memory to transfer firmware fragment to the ISH.

On Arcada chromebook, after the patch the warning:
"Unexpected gfp: 0x4 (GFP_DMA32). Fixing up to gfp: 0xcc0 (GFP_KERNEL).  Fix your code!"
is gone. The ISH firmware is loaded properly and we can interact with
the ISH:
> ectool  --name cros_ish version
...
Build info:    arcada_ish_v2.0.3661+3c1a1c1ae0 2022-02-08 05:37:47 @localhost
Tool version:  v2.0.12300-900b03ec7f 2022-02-08 10:01:48 @localhost

Fixes: commit 91b228107da3 ("HID: intel-ish-hid: ISH firmware loader client driver")
Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
Cc: stable@vger.kernel.org
---
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c | 29 +++------------------
 1 file changed, 3 insertions(+), 26 deletions(-)

Comments

srinivas pandruvada Feb. 9, 2022, 6:52 p.m. UTC | #1
On Tue, 2022-02-08 at 21:09 -0800, Gwendal Grignou wrote:
> Allocating memory with kmalloc and GPF_DMA32 is not allowed, the
> allocator will ignore the attribute.
> 
Looks like there is new error flow added here for this flag.
Is this just removing GFP_DMA32 not enough?

> Instead, use dma_alloc_coherent() API as we allocate a small amount
> of
> memory to transfer firmware fragment to the ISH.
> 
> On Arcada chromebook, after the patch the warning:
> "Unexpected gfp: 0x4 (GFP_DMA32). Fixing up to gfp: 0xcc0
> (GFP_KERNEL).  Fix your code!"
> is gone. The ISH firmware is loaded properly and we can interact with
> the ISH:
> > ectool  --name cros_ish version
> ...
> Build info:    arcada_ish_v2.0.3661+3c1a1c1ae0 2022-02-08 05:37:47
> @localhost
> Tool version:  v2.0.12300-900b03ec7f 2022-02-08 10:01:48 @localhost
> 
> Fixes: commit 91b228107da3 ("HID: intel-ish-hid: ISH firmware loader
> client driver")
> Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
> Cc: stable@vger.kernel.org
> ---
>  drivers/hid/intel-ish-hid/ishtp-fw-loader.c | 29 +++----------------
> --
>  1 file changed, 3 insertions(+), 26 deletions(-)
> 
> diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> index e24988586710..16aa030af845 100644
> --- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> +++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> @@ -661,21 +661,12 @@ static int ish_fw_xfer_direct_dma(struct
> ishtp_cl_data *client_data,
>          */
>         payload_max_size &= ~(L1_CACHE_BYTES - 1);
>  
> -       dma_buf = kmalloc(payload_max_size, GFP_KERNEL | GFP_DMA32);
> +       dma_buf = dma_alloc_coherent(devc, payload_max_size,
> &dma_buf_phy, GFP_KERNEL);
>         if (!dma_buf) {
>                 client_data->flag_retry = true;
>                 return -ENOMEM;
>         }
>  
> -       dma_buf_phy = dma_map_single(devc, dma_buf, payload_max_size,
> -                                    DMA_TO_DEVICE);
> -       if (dma_mapping_error(devc, dma_buf_phy)) {
> -               dev_err(cl_data_to_dev(client_data), "DMA map
> failed\n");
> -               client_data->flag_retry = true;
> -               rv = -ENOMEM;
> -               goto end_err_dma_buf_release;
> -       }
> -
>         ldr_xfer_dma_frag.fragment.hdr.command =
> LOADER_CMD_XFER_FRAGMENT;
>         ldr_xfer_dma_frag.fragment.xfer_mode =
> LOADER_XFER_MODE_DIRECT_DMA;
>         ldr_xfer_dma_frag.ddr_phys_addr = (u64)dma_buf_phy;
> @@ -695,14 +686,7 @@ static int ish_fw_xfer_direct_dma(struct
> ishtp_cl_data *client_data,
>                 ldr_xfer_dma_frag.fragment.size = fragment_size;
>                 memcpy(dma_buf, &fw->data[fragment_offset],
> fragment_size);
>  
> -               dma_sync_single_for_device(devc, dma_buf_phy,
> -                                          payload_max_size,
> -                                          DMA_TO_DEVICE);
> -
Any reason for removal of sync?

Thanks,
Srinivas

> -               /*
> -                * Flush cache here because the
> dma_sync_single_for_device()
> -                * does not do for x86.
> -                */
> +               /* Flush cache to be sure the data is in main memory.
> */
>                 clflush_cache_range(dma_buf, payload_max_size);
>  
>                 dev_dbg(cl_data_to_dev(client_data),
> @@ -725,15 +709,8 @@ static int ish_fw_xfer_direct_dma(struct
> ishtp_cl_data *client_data,
>                 fragment_offset += fragment_size;
>         }
>  
> -       dma_unmap_single(devc, dma_buf_phy, payload_max_size,
> DMA_TO_DEVICE);
> -       kfree(dma_buf);
> -       return 0;
> -
>  end_err_resp_buf_release:
> -       /* Free ISH buffer if not done already, in error case */
> -       dma_unmap_single(devc, dma_buf_phy, payload_max_size,
> DMA_TO_DEVICE);
> -end_err_dma_buf_release:
> -       kfree(dma_buf);
> +       dma_free_coherent(devc, payload_max_size, dma_buf,
> dma_buf_phy);
>         return rv;
>  }
>
Gwendal Grignou Feb. 10, 2022, 12:59 a.m. UTC | #2
On Wed, Feb 9, 2022 at 10:52 AM srinivas pandruvada
<srinivas.pandruvada@linux.intel.com> wrote:
>
> On Tue, 2022-02-08 at 21:09 -0800, Gwendal Grignou wrote:
> > Allocating memory with kmalloc and GPF_DMA32 is not allowed, the
> > allocator will ignore the attribute.
> >
> Looks like there is new error flow added here for this flag.
> Is this just removing GFP_DMA32 not enough?
It was already ignored. It is not enough and I don't know why since
the virtual and physical addresses are in the same range:

With using kmalloc/dma_single_sync:
5.10 (firmware not loading)
[    3.837996] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
kmalloc/dma_map_single: v:0xffff91531ae88000, phy:0x0000000085afe000
[    3.838003] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
ddr_phys_addr=0x0000000085afe000
...

4.19 (firmware loading)
[    3.878300] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
kmalloc/dma_map_single: v:0xffff88c145480000, phy:0x0000000085480000
[    3.878322] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
ddr_phys_addr=0x0000000085480000
...

I also considered flushing the CPU cache before the
dma_sync_single_for_device call, and calling dma_sync_single_for_cpu()
after loader_cl_send() to be allowed to write into the buffer again.
But these did not help either.

But using dma_alloc_coherent() clearly works and its simpler API makes
it more appropriate for the task at hand.

For reference, the same log when using dma_alloc_coherent().
[    3.779723] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
dma_alloc_coherent: v:0xffff9beb81048000, phy:0x0000000001048000
[    3.779731] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
ddr_phys_addr=0x0000000001048000
...

>
> > Instead, use dma_alloc_coherent() API as we allocate a small amount
> > of
> > memory to transfer firmware fragment to the ISH.
> >
> > On Arcada chromebook, after the patch the warning:
> > "Unexpected gfp: 0x4 (GFP_DMA32). Fixing up to gfp: 0xcc0
> > (GFP_KERNEL).  Fix your code!"
> > is gone. The ISH firmware is loaded properly and we can interact with
> > the ISH:
> > > ectool  --name cros_ish version
> > ...
> > Build info:    arcada_ish_v2.0.3661+3c1a1c1ae0 2022-02-08 05:37:47
> > @localhost
> > Tool version:  v2.0.12300-900b03ec7f 2022-02-08 10:01:48 @localhost
> >
> > Fixes: commit 91b228107da3 ("HID: intel-ish-hid: ISH firmware loader
> > client driver")
> > Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
> > Cc: stable@vger.kernel.org
> > ---
> >  drivers/hid/intel-ish-hid/ishtp-fw-loader.c | 29 +++----------------
> > --
> >  1 file changed, 3 insertions(+), 26 deletions(-)
> >
> > diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > index e24988586710..16aa030af845 100644
> > --- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > +++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > @@ -661,21 +661,12 @@ static int ish_fw_xfer_direct_dma(struct
> > ishtp_cl_data *client_data,
> >          */
> >         payload_max_size &= ~(L1_CACHE_BYTES - 1);
> >
> > -       dma_buf = kmalloc(payload_max_size, GFP_KERNEL | GFP_DMA32);
> > +       dma_buf = dma_alloc_coherent(devc, payload_max_size,
> > &dma_buf_phy, GFP_KERNEL);
> >         if (!dma_buf) {
> >                 client_data->flag_retry = true;
> >                 return -ENOMEM;
> >         }
> >
> > -       dma_buf_phy = dma_map_single(devc, dma_buf, payload_max_size,
> > -                                    DMA_TO_DEVICE);
> > -       if (dma_mapping_error(devc, dma_buf_phy)) {
> > -               dev_err(cl_data_to_dev(client_data), "DMA map
> > failed\n");
> > -               client_data->flag_retry = true;
> > -               rv = -ENOMEM;
> > -               goto end_err_dma_buf_release;
> > -       }
> > -
> >         ldr_xfer_dma_frag.fragment.hdr.command =
> > LOADER_CMD_XFER_FRAGMENT;
> >         ldr_xfer_dma_frag.fragment.xfer_mode =
> > LOADER_XFER_MODE_DIRECT_DMA;
> >         ldr_xfer_dma_frag.ddr_phys_addr = (u64)dma_buf_phy;
> > @@ -695,14 +686,7 @@ static int ish_fw_xfer_direct_dma(struct
> > ishtp_cl_data *client_data,
> >                 ldr_xfer_dma_frag.fragment.size = fragment_size;
> >                 memcpy(dma_buf, &fw->data[fragment_offset],
> > fragment_size);
> >
> > -               dma_sync_single_for_device(devc, dma_buf_phy,
> > -                                          payload_max_size,
> > -                                          DMA_TO_DEVICE);
> > -
> Any reason for removal of sync?
It is not needed since we are using dma_alloc_coherent(). From [1]:
"""
void *
dma_alloc_coherent(struct device *dev, size_t size,
   dma_addr_t *dma_handle, gfp_t flag)

Consistent memory is memory for which a write by either the device or
the processor can immediately be read by the processor or device
without having to worry about caching effects.  (You may however need
to make sure to flush the processor's write buffers before telling
devices to read that memory.)

This routine allocates a region of <size> bytes of consistent memory.
"""'
>
> Thanks,
> Srinivas
>
> > -               /*
> > -                * Flush cache here because the
> > dma_sync_single_for_device()
> > -                * does not do for x86.
> > -                */
> > +               /* Flush cache to be sure the data is in main memory.
> > */
> >                 clflush_cache_range(dma_buf, payload_max_size);
> >
> >                 dev_dbg(cl_data_to_dev(client_data),
> > @@ -725,15 +709,8 @@ static int ish_fw_xfer_direct_dma(struct
> > ishtp_cl_data *client_data,
> >                 fragment_offset += fragment_size;
> >         }
> >
> > -       dma_unmap_single(devc, dma_buf_phy, payload_max_size,
> > DMA_TO_DEVICE);
> > -       kfree(dma_buf);
> > -       return 0;
> > -
> >  end_err_resp_buf_release:
> > -       /* Free ISH buffer if not done already, in error case */
> > -       dma_unmap_single(devc, dma_buf_phy, payload_max_size,
> > DMA_TO_DEVICE);
> > -end_err_dma_buf_release:
> > -       kfree(dma_buf);
> > +       dma_free_coherent(devc, payload_max_size, dma_buf,
> > dma_buf_phy);
> >         return rv;
> >  }
> >
>

[1]https://www.kernel.org/doc/Documentation/DMA-API.txt
srinivas pandruvada Feb. 10, 2022, 2:51 a.m. UTC | #3
On Wed, 2022-02-09 at 16:59 -0800, Gwendal Grignou wrote:
> On Wed, Feb 9, 2022 at 10:52 AM srinivas pandruvada
> <srinivas.pandruvada@linux.intel.com> wrote:
> > 
> > On Tue, 2022-02-08 at 21:09 -0800, Gwendal Grignou wrote:
> > > Allocating memory with kmalloc and GPF_DMA32 is not allowed, the
> > > allocator will ignore the attribute.
> > > 
> > Looks like there is new error flow added here for this flag.
> > Is this just removing GFP_DMA32 not enough?
> It was already ignored. It is not enough and I don't know why since
> the virtual and physical addresses are in the same range:
> 
> With using kmalloc/dma_single_sync:
> 5.10 (firmware not loading)
> [    3.837996] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> kmalloc/dma_map_single: v:0xffff91531ae88000, phy:0x0000000085afe000
> [    3.838003] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> ddr_phys_addr=0x0000000085afe000
> ...
> 
> 4.19 (firmware loading)
> [    3.878300] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> kmalloc/dma_map_single: v:0xffff88c145480000, phy:0x0000000085480000
> [    3.878322] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> ddr_phys_addr=0x0000000085480000
> ...
> 
> I also considered flushing the CPU cache before the
> dma_sync_single_for_device call, and calling
> dma_sync_single_for_cpu()
> after loader_cl_send() to be allowed to write into the buffer again.
> But these did not help either.
> 
> But using dma_alloc_coherent() clearly works and its simpler API
> makes
That is OK.

> it more appropriate for the task at hand.
> 
> For reference, the same log when using dma_alloc_coherent().
> [    3.779723] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> dma_alloc_coherent: v:0xffff9beb81048000, phy:0x0000000001048000
> [    3.779731] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> ddr_phys_addr=0x0000000001048000
> ...
> 
> > 
> > > Instead, use dma_alloc_coherent() API as we allocate a small
> > > amount
> > > of
> > > memory to transfer firmware fragment to the ISH.
> > > 
> > > On Arcada chromebook, after the patch the warning:
> > > "Unexpected gfp: 0x4 (GFP_DMA32). Fixing up to gfp: 0xcc0
> > > (GFP_KERNEL).  Fix your code!"
> > > is gone. The ISH firmware is loaded properly and we can interact
> > > with
> > > the ISH:
> > > > ectool  --name cros_ish version
> > > ...
> > > Build info:    arcada_ish_v2.0.3661+3c1a1c1ae0 2022-02-08
> > > 05:37:47
> > > @localhost
> > > Tool version:  v2.0.12300-900b03ec7f 2022-02-08 10:01:48
> > > @localhost
> > > 
> > > Fixes: commit 91b228107da3 ("HID: intel-ish-hid: ISH firmware
> > > loader
> > > client driver")
> > > Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
> > > Cc: stable@vger.kernel.org
> > > ---
> > >  drivers/hid/intel-ish-hid/ishtp-fw-loader.c | 29 +++------------
> > > ----
> > > --
> > >  1 file changed, 3 insertions(+), 26 deletions(-)
> > > 
> > > diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > index e24988586710..16aa030af845 100644
> > > --- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > +++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > @@ -661,21 +661,12 @@ static int ish_fw_xfer_direct_dma(struct
> > > ishtp_cl_data *client_data,
> > >          */
> > >         payload_max_size &= ~(L1_CACHE_BYTES - 1);
> > > 
> > > -       dma_buf = kmalloc(payload_max_size, GFP_KERNEL |
> > > GFP_DMA32);
> > > +       dma_buf = dma_alloc_coherent(devc, payload_max_size,
> > > &dma_buf_phy, GFP_KERNEL);
> > >         if (!dma_buf) {
> > >                 client_data->flag_retry = true;
> > >                 return -ENOMEM;
> > >         }
> > > 
> > > -       dma_buf_phy = dma_map_single(devc, dma_buf,
> > > payload_max_size,
> > > -                                    DMA_TO_DEVICE);
> > > -       if (dma_mapping_error(devc, dma_buf_phy)) {
> > > -               dev_err(cl_data_to_dev(client_data), "DMA map
> > > failed\n");
> > > -               client_data->flag_retry = true;
> > > -               rv = -ENOMEM;
> > > -               goto end_err_dma_buf_release;
> > > -       }
> > > -
> > >         ldr_xfer_dma_frag.fragment.hdr.command =
> > > LOADER_CMD_XFER_FRAGMENT;
> > >         ldr_xfer_dma_frag.fragment.xfer_mode =
> > > LOADER_XFER_MODE_DIRECT_DMA;
> > >         ldr_xfer_dma_frag.ddr_phys_addr = (u64)dma_buf_phy;
> > > @@ -695,14 +686,7 @@ static int ish_fw_xfer_direct_dma(struct
> > > ishtp_cl_data *client_data,
> > >                 ldr_xfer_dma_frag.fragment.size = fragment_size;
> > >                 memcpy(dma_buf, &fw->data[fragment_offset],
> > > fragment_size);
> > > 
> > > -               dma_sync_single_for_device(devc, dma_buf_phy,
> > > -                                          payload_max_size,
> > > -                                          DMA_TO_DEVICE);
> > > -
> > Any reason for removal of sync?
> It is not needed since we are using dma_alloc_coherent(). From [1]:
> """
> void *
> dma_alloc_coherent(struct device *dev, size_t size,
>    dma_addr_t *dma_handle, gfp_t flag)
> 
> Consistent memory is memory for which a write by either the device or
> the processor can immediately be read by the processor or device
> without having to worry about caching effects.  (You may however need
> to make sure to flush the processor's write buffers before telling
> devices to read that memory.)
> 
> This routine allocates a region of <size> bytes of consistent memory.
> """'
> 
 I checked with some dma folks. This call may still be required for
some device. Most likely not as this is on chip device.
What happens if you leave this call? I worry for regression on some old
systems.

Thanks,
Srinivas



> > Thanks,
> > Srinivas
> > 
> > > -               /*
> > > -                * Flush cache here because the
> > > dma_sync_single_for_device()
> > > -                * does not do for x86.
> > > -                */
> > > +               /* Flush cache to be sure the data is in main
> > > memory.
> > > */
> > >                 clflush_cache_range(dma_buf, payload_max_size);
> > > 
> > >                 dev_dbg(cl_data_to_dev(client_data),
> > > @@ -725,15 +709,8 @@ static int ish_fw_xfer_direct_dma(struct
> > > ishtp_cl_data *client_data,
> > >                 fragment_offset += fragment_size;
> > >         }
> > > 
> > > -       dma_unmap_single(devc, dma_buf_phy, payload_max_size,
> > > DMA_TO_DEVICE);
> > > -       kfree(dma_buf);
> > > -       return 0;
> > > -
> > >  end_err_resp_buf_release:
> > > -       /* Free ISH buffer if not done already, in error case */
> > > -       dma_unmap_single(devc, dma_buf_phy, payload_max_size,
> > > DMA_TO_DEVICE);
> > > -end_err_dma_buf_release:
> > > -       kfree(dma_buf);
> > > +       dma_free_coherent(devc, payload_max_size, dma_buf,
> > > dma_buf_phy);
> > >         return rv;
> > >  }
> > > 
> > 
> 
> [1]https://www.kernel.org/doc/Documentation/DMA-API.txt
Gwendal Grignou Feb. 10, 2022, 6:34 p.m. UTC | #4
On Wed, Feb 9, 2022 at 6:51 PM srinivas pandruvada
<srinivas.pandruvada@linux.intel.com> wrote:
>
> On Wed, 2022-02-09 at 16:59 -0800, Gwendal Grignou wrote:
> > On Wed, Feb 9, 2022 at 10:52 AM srinivas pandruvada
> > <srinivas.pandruvada@linux.intel.com> wrote:
> > >
> > > On Tue, 2022-02-08 at 21:09 -0800, Gwendal Grignou wrote:
> > > > Allocating memory with kmalloc and GPF_DMA32 is not allowed, the
> > > > allocator will ignore the attribute.
> > > >
> > > Looks like there is new error flow added here for this flag.
> > > Is this just removing GFP_DMA32 not enough?
> > It was already ignored. It is not enough and I don't know why since
> > the virtual and physical addresses are in the same range:
> >
> > With using kmalloc/dma_single_sync:
> > 5.10 (firmware not loading)
> > [    3.837996] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > kmalloc/dma_map_single: v:0xffff91531ae88000, phy:0x0000000085afe000
> > [    3.838003] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > ddr_phys_addr=0x0000000085afe000
> > ...
> >
> > 4.19 (firmware loading)
> > [    3.878300] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > kmalloc/dma_map_single: v:0xffff88c145480000, phy:0x0000000085480000
> > [    3.878322] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > ddr_phys_addr=0x0000000085480000
> > ...
> >
> > I also considered flushing the CPU cache before the
> > dma_sync_single_for_device call, and calling
> > dma_sync_single_for_cpu()
> > after loader_cl_send() to be allowed to write into the buffer again.
> > But these did not help either.
> >
> > But using dma_alloc_coherent() clearly works and its simpler API
> > makes
> That is OK.
>
> > it more appropriate for the task at hand.
> >
> > For reference, the same log when using dma_alloc_coherent().
> > [    3.779723] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > dma_alloc_coherent: v:0xffff9beb81048000, phy:0x0000000001048000
> > [    3.779731] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > ddr_phys_addr=0x0000000001048000
> > ...
> >
> > >
> > > > Instead, use dma_alloc_coherent() API as we allocate a small
> > > > amount
> > > > of
> > > > memory to transfer firmware fragment to the ISH.
> > > >
> > > > On Arcada chromebook, after the patch the warning:
> > > > "Unexpected gfp: 0x4 (GFP_DMA32). Fixing up to gfp: 0xcc0
> > > > (GFP_KERNEL).  Fix your code!"
> > > > is gone. The ISH firmware is loaded properly and we can interact
> > > > with
> > > > the ISH:
> > > > > ectool  --name cros_ish version
> > > > ...
> > > > Build info:    arcada_ish_v2.0.3661+3c1a1c1ae0 2022-02-08
> > > > 05:37:47
> > > > @localhost
> > > > Tool version:  v2.0.12300-900b03ec7f 2022-02-08 10:01:48
> > > > @localhost
> > > >
> > > > Fixes: commit 91b228107da3 ("HID: intel-ish-hid: ISH firmware
> > > > loader
> > > > client driver")
> > > > Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
> > > > Cc: stable@vger.kernel.org
> > > > ---
> > > >  drivers/hid/intel-ish-hid/ishtp-fw-loader.c | 29 +++------------
> > > > ----
> > > > --
> > > >  1 file changed, 3 insertions(+), 26 deletions(-)
> > > >
> > > > diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > index e24988586710..16aa030af845 100644
> > > > --- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > +++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > @@ -661,21 +661,12 @@ static int ish_fw_xfer_direct_dma(struct
> > > > ishtp_cl_data *client_data,
> > > >          */
> > > >         payload_max_size &= ~(L1_CACHE_BYTES - 1);
> > > >
> > > > -       dma_buf = kmalloc(payload_max_size, GFP_KERNEL |
> > > > GFP_DMA32);
> > > > +       dma_buf = dma_alloc_coherent(devc, payload_max_size,
> > > > &dma_buf_phy, GFP_KERNEL);
> > > >         if (!dma_buf) {
> > > >                 client_data->flag_retry = true;
> > > >                 return -ENOMEM;
> > > >         }
> > > >
> > > > -       dma_buf_phy = dma_map_single(devc, dma_buf,
> > > > payload_max_size,
> > > > -                                    DMA_TO_DEVICE);
> > > > -       if (dma_mapping_error(devc, dma_buf_phy)) {
> > > > -               dev_err(cl_data_to_dev(client_data), "DMA map
> > > > failed\n");
> > > > -               client_data->flag_retry = true;
> > > > -               rv = -ENOMEM;
> > > > -               goto end_err_dma_buf_release;
> > > > -       }
> > > > -
> > > >         ldr_xfer_dma_frag.fragment.hdr.command =
> > > > LOADER_CMD_XFER_FRAGMENT;
> > > >         ldr_xfer_dma_frag.fragment.xfer_mode =
> > > > LOADER_XFER_MODE_DIRECT_DMA;
> > > >         ldr_xfer_dma_frag.ddr_phys_addr = (u64)dma_buf_phy;
> > > > @@ -695,14 +686,7 @@ static int ish_fw_xfer_direct_dma(struct
> > > > ishtp_cl_data *client_data,
> > > >                 ldr_xfer_dma_frag.fragment.size = fragment_size;
> > > >                 memcpy(dma_buf, &fw->data[fragment_offset],
> > > > fragment_size);
> > > >
> > > > -               dma_sync_single_for_device(devc, dma_buf_phy,
> > > > -                                          payload_max_size,
> > > > -                                          DMA_TO_DEVICE);
> > > > -
> > > Any reason for removal of sync?
> > It is not needed since we are using dma_alloc_coherent(). From [1]:
> > """
> > void *
> > dma_alloc_coherent(struct device *dev, size_t size,
> >    dma_addr_t *dma_handle, gfp_t flag)
> >
> > Consistent memory is memory for which a write by either the device or
> > the processor can immediately be read by the processor or device
> > without having to worry about caching effects.  (You may however need
> > to make sure to flush the processor's write buffers before telling
> > devices to read that memory.)
> >
> > This routine allocates a region of <size> bytes of consistent memory.
> > """'
> >
>  I checked with some dma folks. This call may still be required for
> some device. Most likely not as this is on chip device.
> What happens if you leave this call? I worry for regression on some old
> systems.
I truly believe this call is unnecessary: From the docs, dma_sync_...
is only for streaming DMA mappings, not for coherent memory. Another
link here: http://lists.infradead.org/pipermail/linux-arm-kernel/2015-May/341712.html
Looking at older drivers, for instance exec_drive_command() in
driver/block/mtip32xx/mtip32xx.c, there are no dma_sync_ between
dma_alloc_ and dma_free_.
In a simple driver - drivers/scsi/advansys.c - that only uses coherent
DMA memory, no dma_sync calls are used.
As long as we have a memory barrier before sending the packet
downstream, we are covered. I rebooted my machine in a loop without
error overnight.

Gwendal.

>
> Thanks,
> Srinivas
>
>
>
> > > Thanks,
> > > Srinivas
> > >
> > > > -               /*
> > > > -                * Flush cache here because the
> > > > dma_sync_single_for_device()
> > > > -                * does not do for x86.
> > > > -                */
> > > > +               /* Flush cache to be sure the data is in main
> > > > memory.
> > > > */
> > > >                 clflush_cache_range(dma_buf, payload_max_size);
> > > >
> > > >                 dev_dbg(cl_data_to_dev(client_data),
> > > > @@ -725,15 +709,8 @@ static int ish_fw_xfer_direct_dma(struct
> > > > ishtp_cl_data *client_data,
> > > >                 fragment_offset += fragment_size;
> > > >         }
> > > >
> > > > -       dma_unmap_single(devc, dma_buf_phy, payload_max_size,
> > > > DMA_TO_DEVICE);
> > > > -       kfree(dma_buf);
> > > > -       return 0;
> > > > -
> > > >  end_err_resp_buf_release:
> > > > -       /* Free ISH buffer if not done already, in error case */
> > > > -       dma_unmap_single(devc, dma_buf_phy, payload_max_size,
> > > > DMA_TO_DEVICE);
> > > > -end_err_dma_buf_release:
> > > > -       kfree(dma_buf);
> > > > +       dma_free_coherent(devc, payload_max_size, dma_buf,
> > > > dma_buf_phy);
> > > >         return rv;
> > > >  }
> > > >
> > >
> >
> > [1]https://www.kernel.org/doc/Documentation/DMA-API.txt
>
srinivas pandruvada Feb. 10, 2022, 7:39 p.m. UTC | #5
On Thu, 2022-02-10 at 10:34 -0800, Gwendal Grignou wrote:
> On Wed, Feb 9, 2022 at 6:51 PM srinivas pandruvada
> <srinivas.pandruvada@linux.intel.com> wrote:
> > 
> > On Wed, 2022-02-09 at 16:59 -0800, Gwendal Grignou wrote:
> > > On Wed, Feb 9, 2022 at 10:52 AM srinivas pandruvada
> > > <srinivas.pandruvada@linux.intel.com> wrote:
> > > > 
> > > > On Tue, 2022-02-08 at 21:09 -0800, Gwendal Grignou wrote:
> > > > > Allocating memory with kmalloc and GPF_DMA32 is not allowed,
> > > > > the
> > > > > allocator will ignore the attribute.
> > > > > 
> > > > Looks like there is new error flow added here for this flag.
> > > > Is this just removing GFP_DMA32 not enough?
> > > It was already ignored. It is not enough and I don't know why
> > > since
> > > the virtual and physical addresses are in the same range:
> > > 
> > > With using kmalloc/dma_single_sync:
> > > 5.10 (firmware not loading)
> > > [    3.837996] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > kmalloc/dma_map_single: v:0xffff91531ae88000,
> > > phy:0x0000000085afe000
> > > [    3.838003] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > > ddr_phys_addr=0x0000000085afe000
> > > ...
> > > 
> > > 4.19 (firmware loading)
> > > [    3.878300] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > kmalloc/dma_map_single: v:0xffff88c145480000,
> > > phy:0x0000000085480000
> > > [    3.878322] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > > ddr_phys_addr=0x0000000085480000
> > > ...
> > > 
> > > I also considered flushing the CPU cache before the
> > > dma_sync_single_for_device call, and calling
> > > dma_sync_single_for_cpu()
> > > after loader_cl_send() to be allowed to write into the buffer
> > > again.
> > > But these did not help either.
> > > 
> > > But using dma_alloc_coherent() clearly works and its simpler API
> > > makes
> > That is OK.
> > 
> > > it more appropriate for the task at hand.
> > > 
> > > For reference, the same log when using dma_alloc_coherent().
> > > [    3.779723] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > dma_alloc_coherent: v:0xffff9beb81048000, phy:0x0000000001048000
> > > [    3.779731] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > > ddr_phys_addr=0x0000000001048000
> > > ...
> > > 
> > > > 
> > > > > Instead, use dma_alloc_coherent() API as we allocate a small
> > > > > amount
> > > > > of
> > > > > memory to transfer firmware fragment to the ISH.
> > > > > 
> > > > > On Arcada chromebook, after the patch the warning:
> > > > > "Unexpected gfp: 0x4 (GFP_DMA32). Fixing up to gfp: 0xcc0
> > > > > (GFP_KERNEL).  Fix your code!"
> > > > > is gone. The ISH firmware is loaded properly and we can
> > > > > interact
> > > > > with
> > > > > the ISH:
> > > > > > ectool  --name cros_ish version
> > > > > ...
> > > > > Build info:    arcada_ish_v2.0.3661+3c1a1c1ae0 2022-02-08
> > > > > 05:37:47
> > > > > @localhost
> > > > > Tool version:  v2.0.12300-900b03ec7f 2022-02-08 10:01:48
> > > > > @localhost
> > > > > 
> > > > > Fixes: commit 91b228107da3 ("HID: intel-ish-hid: ISH firmware
> > > > > loader
> > > > > client driver")
> > > > > Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
> > > > > Cc: stable@vger.kernel.org
> > > > > ---
> > > > >  drivers/hid/intel-ish-hid/ishtp-fw-loader.c | 29 +++--------
> > > > > ----
> > > > > ----
> > > > > --
> > > > >  1 file changed, 3 insertions(+), 26 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > index e24988586710..16aa030af845 100644
> > > > > --- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > +++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > @@ -661,21 +661,12 @@ static int
> > > > > ish_fw_xfer_direct_dma(struct
> > > > > ishtp_cl_data *client_data,
> > > > >          */
> > > > >         payload_max_size &= ~(L1_CACHE_BYTES - 1);
> > > > > 
> > > > > -       dma_buf = kmalloc(payload_max_size, GFP_KERNEL |
> > > > > GFP_DMA32);
> > > > > +       dma_buf = dma_alloc_coherent(devc, payload_max_size,
> > > > > &dma_buf_phy, GFP_KERNEL);
> > > > >         if (!dma_buf) {
> > > > >                 client_data->flag_retry = true;
> > > > >                 return -ENOMEM;
> > > > >         }
> > > > > 
> > > > > -       dma_buf_phy = dma_map_single(devc, dma_buf,
> > > > > payload_max_size,
> > > > > -                                    DMA_TO_DEVICE);
> > > > > -       if (dma_mapping_error(devc, dma_buf_phy)) {
> > > > > -               dev_err(cl_data_to_dev(client_data), "DMA map
> > > > > failed\n");
> > > > > -               client_data->flag_retry = true;
> > > > > -               rv = -ENOMEM;
> > > > > -               goto end_err_dma_buf_release;
> > > > > -       }
> > > > > -
> > > > >         ldr_xfer_dma_frag.fragment.hdr.command =
> > > > > LOADER_CMD_XFER_FRAGMENT;
> > > > >         ldr_xfer_dma_frag.fragment.xfer_mode =
> > > > > LOADER_XFER_MODE_DIRECT_DMA;
> > > > >         ldr_xfer_dma_frag.ddr_phys_addr = (u64)dma_buf_phy;
> > > > > @@ -695,14 +686,7 @@ static int ish_fw_xfer_direct_dma(struct
> > > > > ishtp_cl_data *client_data,
> > > > >                 ldr_xfer_dma_frag.fragment.size =
> > > > > fragment_size;
> > > > >                 memcpy(dma_buf, &fw->data[fragment_offset],
> > > > > fragment_size);
> > > > > 
> > > > > -               dma_sync_single_for_device(devc, dma_buf_phy,
> > > > > -                                          payload_max_size,
> > > > > -                                          DMA_TO_DEVICE);
> > > > > -
> > > > Any reason for removal of sync?
> > > It is not needed since we are using dma_alloc_coherent(). From
> > > [1]:
> > > """
> > > void *
> > > dma_alloc_coherent(struct device *dev, size_t size,
> > >    dma_addr_t *dma_handle, gfp_t flag)
> > > 
> > > Consistent memory is memory for which a write by either the
> > > device or
> > > the processor can immediately be read by the processor or device
> > > without having to worry about caching effects.  (You may however
> > > need
> > > to make sure to flush the processor's write buffers before
> > > telling
> > > devices to read that memory.)
> > > 
> > > This routine allocates a region of <size> bytes of consistent
> > > memory.
> > > """'
> > > 
> >  I checked with some dma folks. This call may still be required for
> > some device. Most likely not as this is on chip device.
> > What happens if you leave this call? I worry for regression on some
> > old
> > systems.
> I truly believe this call is unnecessary: From the docs, dma_sync_...
> is only for streaming DMA mappings, not for coherent memory. Another
> link here:
> http://lists.infradead.org/pipermail/linux-arm-kernel/2015-May/341712.html
> Looking at older drivers, for instance exec_drive_command() in
> driver/block/mtip32xx/mtip32xx.c, there are no dma_sync_ between
> dma_alloc_ and dma_free_.
> In a simple driver - drivers/scsi/advansys.c - that only uses
> coherent
> DMA memory, no dma_sync calls are used.
> As long as we have a memory barrier before sending the packet
> downstream, we are covered. I rebooted my machine in a loop without
> error overnight.
> 
I think sync may not be required here. But 
there is an additional step you miss if you don't call sync, 
irrespective of "(!dev_is_dma_coherent(dev))" in the call chain of this
sync.

	if (unlikely(is_swiotlb_buffer(dev, paddr)))
		swiotlb_sync_single_for_device(dev, paddr, size, dir);

So when SWIOTLB is enabled with swiotlb=force and bypass IOMMU, I don't
know the net affect of this.
So I need some further debug on this.

Thanks,
Srinivas

> Gwendal.
> 
> > 
> > Thanks,
> > Srinivas
> > 
> > 
> > 
> > > > Thanks,
> > > > Srinivas
> > > > 
> > > > > -               /*
> > > > > -                * Flush cache here because the
> > > > > dma_sync_single_for_device()
> > > > > -                * does not do for x86.
> > > > > -                */
> > > > > +               /* Flush cache to be sure the data is in main
> > > > > memory.
> > > > > */
> > > > >                 clflush_cache_range(dma_buf,
> > > > > payload_max_size);
> > > > > 
> > > > >                 dev_dbg(cl_data_to_dev(client_data),
> > > > > @@ -725,15 +709,8 @@ static int ish_fw_xfer_direct_dma(struct
> > > > > ishtp_cl_data *client_data,
> > > > >                 fragment_offset += fragment_size;
> > > > >         }
> > > > > 
> > > > > -       dma_unmap_single(devc, dma_buf_phy, payload_max_size,
> > > > > DMA_TO_DEVICE);
> > > > > -       kfree(dma_buf);
> > > > > -       return 0;
> > > > > -
> > > > >  end_err_resp_buf_release:
> > > > > -       /* Free ISH buffer if not done already, in error case
> > > > > */
> > > > > -       dma_unmap_single(devc, dma_buf_phy, payload_max_size,
> > > > > DMA_TO_DEVICE);
> > > > > -end_err_dma_buf_release:
> > > > > -       kfree(dma_buf);
> > > > > +       dma_free_coherent(devc, payload_max_size, dma_buf,
> > > > > dma_buf_phy);
> > > > >         return rv;
> > > > >  }
> > > > > 
> > > > 
> > > 
> > > [1]https://www.kernel.org/doc/Documentation/DMA-API.txt
> >
srinivas pandruvada Feb. 14, 2022, 5:11 p.m. UTC | #6
On Thu, 2022-02-10 at 11:39 -0800, srinivas pandruvada wrote:
> On Thu, 2022-02-10 at 10:34 -0800, Gwendal Grignou wrote:
> > On Wed, Feb 9, 2022 at 6:51 PM srinivas pandruvada
> > <srinivas.pandruvada@linux.intel.com> wrote:
> > > 
> > > On Wed, 2022-02-09 at 16:59 -0800, Gwendal Grignou wrote:
> > > > On Wed, Feb 9, 2022 at 10:52 AM srinivas pandruvada
> > > > <srinivas.pandruvada@linux.intel.com> wrote:
> > > > > 
> > > > > On Tue, 2022-02-08 at 21:09 -0800, Gwendal Grignou wrote:
> > > > > > Allocating memory with kmalloc and GPF_DMA32 is not
> > > > > > allowed,
> > > > > > the
> > > > > > allocator will ignore the attribute.
> > > > > > 
> > > > > Looks like there is new error flow added here for this flag.
> > > > > Is this just removing GFP_DMA32 not enough?
> > > > It was already ignored. It is not enough and I don't know why
> > > > since
> > > > the virtual and physical addresses are in the same range:
> > > > 
> > > > With using kmalloc/dma_single_sync:
> > > > 5.10 (firmware not loading)
> > > > [    3.837996] ish-loader {C804D06A-55BD-4EA7-ADED-
> > > > 1E31228C76DC}:
> > > > kmalloc/dma_map_single: v:0xffff91531ae88000,
> > > > phy:0x0000000085afe000
> > > > [    3.838003] ish-loader {C804D06A-55BD-4EA7-ADED-
> > > > 1E31228C76DC}:
> > > > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > > > ddr_phys_addr=0x0000000085afe000
> > > > ...
> > > > 
> > > > 4.19 (firmware loading)
> > > > [    3.878300] ish-loader {C804D06A-55BD-4EA7-ADED-
> > > > 1E31228C76DC}:
> > > > kmalloc/dma_map_single: v:0xffff88c145480000,
> > > > phy:0x0000000085480000
> > > > [    3.878322] ish-loader {C804D06A-55BD-4EA7-ADED-
> > > > 1E31228C76DC}:
> > > > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > > > ddr_phys_addr=0x0000000085480000
> > > > ...
> > > > 
> > > > I also considered flushing the CPU cache before the
> > > > dma_sync_single_for_device call, and calling
> > > > dma_sync_single_for_cpu()
> > > > after loader_cl_send() to be allowed to write into the buffer
> > > > again.
> > > > But these did not help either.
> > > > 
> > > > But using dma_alloc_coherent() clearly works and its simpler
> > > > API
> > > > makes
> > > That is OK.
> > > 
> > > > it more appropriate for the task at hand.
> > > > 
> > > > For reference, the same log when using dma_alloc_coherent().
> > > > [    3.779723] ish-loader {C804D06A-55BD-4EA7-ADED-
> > > > 1E31228C76DC}:
> > > > dma_alloc_coherent: v:0xffff9beb81048000,
> > > > phy:0x0000000001048000
> > > > [    3.779731] ish-loader {C804D06A-55BD-4EA7-ADED-
> > > > 1E31228C76DC}:
> > > > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > > > ddr_phys_addr=0x0000000001048000
> > > > ...
> > > > 
> > > > > 
> > > > > > Instead, use dma_alloc_coherent() API as we allocate a
> > > > > > small
> > > > > > amount
> > > > > > of
> > > > > > memory to transfer firmware fragment to the ISH.
> > > > > > 
> > > > > > On Arcada chromebook, after the patch the warning:
> > > > > > "Unexpected gfp: 0x4 (GFP_DMA32). Fixing up to gfp: 0xcc0
> > > > > > (GFP_KERNEL).  Fix your code!"
> > > > > > is gone. The ISH firmware is loaded properly and we can
> > > > > > interact
> > > > > > with
> > > > > > the ISH:
> > > > > > > ectool  --name cros_ish version
> > > > > > ...
> > > > > > Build info:    arcada_ish_v2.0.3661+3c1a1c1ae0 2022-02-08
> > > > > > 05:37:47
> > > > > > @localhost
> > > > > > Tool version:  v2.0.12300-900b03ec7f 2022-02-08 10:01:48
> > > > > > @localhost
> > > > > > 
> > > > > > Fixes: commit 91b228107da3 ("HID: intel-ish-hid: ISH
> > > > > > firmware
> > > > > > loader
> > > > > > client driver")
> > > > > > Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
> > > > > > Cc: stable@vger.kernel.org
> > > > > > ---
> > > > > >  drivers/hid/intel-ish-hid/ishtp-fw-loader.c | 29 +++------
> > > > > > --
> > > > > > ----
> > > > > > ----
> > > > > > --
> > > > > >  1 file changed, 3 insertions(+), 26 deletions(-)
> > > > > > 
> > > > > > diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > > b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > > index e24988586710..16aa030af845 100644
> > > > > > --- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > > +++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > > @@ -661,21 +661,12 @@ static int
> > > > > > ish_fw_xfer_direct_dma(struct
> > > > > > ishtp_cl_data *client_data,
> > > > > >          */
> > > > > >         payload_max_size &= ~(L1_CACHE_BYTES - 1);
> > > > > > 
> > > > > > -       dma_buf = kmalloc(payload_max_size, GFP_KERNEL |
> > > > > > GFP_DMA32);
> > > > > > +       dma_buf = dma_alloc_coherent(devc,
> > > > > > payload_max_size,
> > > > > > &dma_buf_phy, GFP_KERNEL);
> > > > > >         if (!dma_buf) {
> > > > > >                 client_data->flag_retry = true;
> > > > > >                 return -ENOMEM;
> > > > > >         }
> > > > > > 
> > > > > > -       dma_buf_phy = dma_map_single(devc, dma_buf,
> > > > > > payload_max_size,
> > > > > > -                                    DMA_TO_DEVICE);
> > > > > > -       if (dma_mapping_error(devc, dma_buf_phy)) {
> > > > > > -               dev_err(cl_data_to_dev(client_data), "DMA
> > > > > > map
> > > > > > failed\n");
> > > > > > -               client_data->flag_retry = true;
> > > > > > -               rv = -ENOMEM;
> > > > > > -               goto end_err_dma_buf_release;
> > > > > > -       }
> > > > > > -
> > > > > >         ldr_xfer_dma_frag.fragment.hdr.command =
> > > > > > LOADER_CMD_XFER_FRAGMENT;
> > > > > >         ldr_xfer_dma_frag.fragment.xfer_mode =
> > > > > > LOADER_XFER_MODE_DIRECT_DMA;
> > > > > >         ldr_xfer_dma_frag.ddr_phys_addr = (u64)dma_buf_phy;
> > > > > > @@ -695,14 +686,7 @@ static int
> > > > > > ish_fw_xfer_direct_dma(struct
> > > > > > ishtp_cl_data *client_data,
> > > > > >                 ldr_xfer_dma_frag.fragment.size =
> > > > > > fragment_size;
> > > > > >                 memcpy(dma_buf, &fw->data[fragment_offset],
> > > > > > fragment_size);
> > > > > > 
> > > > > > -               dma_sync_single_for_device(devc,
> > > > > > dma_buf_phy,
> > > > > > -                                         
> > > > > > payload_max_size,
> > > > > > -                                          DMA_TO_DEVICE);
> > > > > > -
> > > > > Any reason for removal of sync?
> > > > It is not needed since we are using dma_alloc_coherent(). From
> > > > [1]:
> > > > """
> > > > void *
> > > > dma_alloc_coherent(struct device *dev, size_t size,
> > > >    dma_addr_t *dma_handle, gfp_t flag)
> > > > 
> > > > Consistent memory is memory for which a write by either the
> > > > device or
> > > > the processor can immediately be read by the processor or
> > > > device
> > > > without having to worry about caching effects.  (You may
> > > > however
> > > > need
> > > > to make sure to flush the processor's write buffers before
> > > > telling
> > > > devices to read that memory.)
> > > > 
> > > > This routine allocates a region of <size> bytes of consistent
> > > > memory.
> > > > """'
> > > > 
> > >  I checked with some dma folks. This call may still be required
> > > for
> > > some device. Most likely not as this is on chip device.
> > > What happens if you leave this call? I worry for regression on
> > > some
> > > old
> > > systems.
> > I truly believe this call is unnecessary: From the docs,
> > dma_sync_...
> > is only for streaming DMA mappings, not for coherent memory.
> > Another
> > link here:
> > http://lists.infradead.org/pipermail/linux-arm-kernel/2015-May/341712.html
> > Looking at older drivers, for instance exec_drive_command() in
> > driver/block/mtip32xx/mtip32xx.c, there are no dma_sync_ between
> > dma_alloc_ and dma_free_.
> > In a simple driver - drivers/scsi/advansys.c - that only uses
> > coherent
> > DMA memory, no dma_sync calls are used.
> > As long as we have a memory barrier before sending the packet
> > downstream, we are covered. I rebooted my machine in a loop without
> > error overnight.
> > 
> I think sync may not be required here. But 
> there is an additional step you miss if you don't call sync, 
> irrespective of "(!dev_is_dma_coherent(dev))" in the call chain of
> this
> sync.
> 
>         if (unlikely(is_swiotlb_buffer(dev, paddr)))
>                 swiotlb_sync_single_for_device(dev, paddr, size,
> dir);
> 
> So when SWIOTLB is enabled with swiotlb=force and bypass IOMMU, I
> don't
> know the net affect of this.
> So I need some further debug on this.
Looking into this, we will miss
copy the swiotlb buffer back and from original DMA location,
Please try with kernel command line option "swiotlb=force", if this can
still work we can remove this call safely.

Thanks,
Srinivas


> 
> Thanks,
> Srinivas
> 
> > Gwendal.
> > 
> > > 
> > > Thanks,
> > > Srinivas
> > > 
> > > 
> > > 
> > > > > Thanks,
> > > > > Srinivas
> > > > > 
> > > > > > -               /*
> > > > > > -                * Flush cache here because the
> > > > > > dma_sync_single_for_device()
> > > > > > -                * does not do for x86.
> > > > > > -                */
> > > > > > +               /* Flush cache to be sure the data is in
> > > > > > main
> > > > > > memory.
> > > > > > */
> > > > > >                 clflush_cache_range(dma_buf,
> > > > > > payload_max_size);
> > > > > > 
> > > > > >                 dev_dbg(cl_data_to_dev(client_data),
> > > > > > @@ -725,15 +709,8 @@ static int
> > > > > > ish_fw_xfer_direct_dma(struct
> > > > > > ishtp_cl_data *client_data,
> > > > > >                 fragment_offset += fragment_size;
> > > > > >         }
> > > > > > 
> > > > > > -       dma_unmap_single(devc, dma_buf_phy,
> > > > > > payload_max_size,
> > > > > > DMA_TO_DEVICE);
> > > > > > -       kfree(dma_buf);
> > > > > > -       return 0;
> > > > > > -
> > > > > >  end_err_resp_buf_release:
> > > > > > -       /* Free ISH buffer if not done already, in error
> > > > > > case
> > > > > > */
> > > > > > -       dma_unmap_single(devc, dma_buf_phy,
> > > > > > payload_max_size,
> > > > > > DMA_TO_DEVICE);
> > > > > > -end_err_dma_buf_release:
> > > > > > -       kfree(dma_buf);
> > > > > > +       dma_free_coherent(devc, payload_max_size, dma_buf,
> > > > > > dma_buf_phy);
> > > > > >         return rv;
> > > > > >  }
> > > > > > 
> > > > > 
> > > > 
> > > > [1]https://www.kernel.org/doc/Documentation/DMA-API.txt
> > > 
>
srinivas pandruvada March 4, 2022, 6:04 p.m. UTC | #7
On Thu, 2022-02-10 at 10:34 -0800, Gwendal Grignou wrote:
> On Wed, Feb 9, 2022 at 6:51 PM srinivas pandruvada
> <srinivas.pandruvada@linux.intel.com> wrote:
> > 
> > On Wed, 2022-02-09 at 16:59 -0800, Gwendal Grignou wrote:
> > > On Wed, Feb 9, 2022 at 10:52 AM srinivas pandruvada
> > > <srinivas.pandruvada@linux.intel.com> wrote:
> > > > 
> > > > On Tue, 2022-02-08 at 21:09 -0800, Gwendal Grignou wrote:
> > > > > Allocating memory with kmalloc and GPF_DMA32 is not allowed,
> > > > > the
> > > > > allocator will ignore the attribute.
> > > > > 
> > > > Looks like there is new error flow added here for this flag.
> > > > Is this just removing GFP_DMA32 not enough?
> > > It was already ignored. It is not enough and I don't know why
> > > since
> > > the virtual and physical addresses are in the same range:
> > > 
> > > With using kmalloc/dma_single_sync:
> > > 5.10 (firmware not loading)
> > > [    3.837996] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > kmalloc/dma_map_single: v:0xffff91531ae88000,
> > > phy:0x0000000085afe000
> > > [    3.838003] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > > ddr_phys_addr=0x0000000085afe000
> > > ...
> > > 
> > > 4.19 (firmware loading)
> > > [    3.878300] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > kmalloc/dma_map_single: v:0xffff88c145480000,
> > > phy:0x0000000085480000
> > > [    3.878322] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > > ddr_phys_addr=0x0000000085480000
> > > ...
> > > 
> > > I also considered flushing the CPU cache before the
> > > dma_sync_single_for_device call, and calling
> > > dma_sync_single_for_cpu()
> > > after loader_cl_send() to be allowed to write into the buffer
> > > again.
> > > But these did not help either.
> > > 
> > > But using dma_alloc_coherent() clearly works and its simpler API
> > > makes
> > That is OK.
> > 
> > > it more appropriate for the task at hand.
> > > 
> > > For reference, the same log when using dma_alloc_coherent().
> > > [    3.779723] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > dma_alloc_coherent: v:0xffff9beb81048000, phy:0x0000000001048000
> > > [    3.779731] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > > ddr_phys_addr=0x0000000001048000
> > > ...
> > > 
> > > > 
> > > > > Instead, use dma_alloc_coherent() API as we allocate a small
> > > > > amount
> > > > > of
> > > > > memory to transfer firmware fragment to the ISH.
> > > > > 
> > > > > On Arcada chromebook, after the patch the warning:
> > > > > "Unexpected gfp: 0x4 (GFP_DMA32). Fixing up to gfp: 0xcc0
> > > > > (GFP_KERNEL).  Fix your code!"
> > > > > is gone. The ISH firmware is loaded properly and we can
> > > > > interact
> > > > > with
> > > > > the ISH:
> > > > > > ectool  --name cros_ish version
> > > > > ...
> > > > > Build info:    arcada_ish_v2.0.3661+3c1a1c1ae0 2022-02-08
> > > > > 05:37:47
> > > > > @localhost
> > > > > Tool version:  v2.0.12300-900b03ec7f 2022-02-08 10:01:48
> > > > > @localhost
> > > > > 
> > > > > Fixes: commit 91b228107da3 ("HID: intel-ish-hid: ISH firmware
> > > > > loader
> > > > > client driver")
> > > > > Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
> > > > > Cc: stable@vger.kernel.org
> > > > > ---
> > > > >  drivers/hid/intel-ish-hid/ishtp-fw-loader.c | 29 +++--------
> > > > > ----
> > > > > ----
> > > > > --
> > > > >  1 file changed, 3 insertions(+), 26 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > index e24988586710..16aa030af845 100644
> > > > > --- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > +++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > @@ -661,21 +661,12 @@ static int
> > > > > ish_fw_xfer_direct_dma(struct
> > > > > ishtp_cl_data *client_data,
> > > > >          */
> > > > >         payload_max_size &= ~(L1_CACHE_BYTES - 1);
> > > > > 
> > > > > -       dma_buf = kmalloc(payload_max_size, GFP_KERNEL |
> > > > > GFP_DMA32);
> > > > > +       dma_buf = dma_alloc_coherent(devc, payload_max_size,
> > > > > &dma_buf_phy, GFP_KERNEL);
> > > > >         if (!dma_buf) {
> > > > >                 client_data->flag_retry = true;
> > > > >                 return -ENOMEM;
> > > > >         }
> > > > > 
> > > > > -       dma_buf_phy = dma_map_single(devc, dma_buf,
> > > > > payload_max_size,
> > > > > -                                    DMA_TO_DEVICE);
> > > > > -       if (dma_mapping_error(devc, dma_buf_phy)) {
> > > > > -               dev_err(cl_data_to_dev(client_data), "DMA map
> > > > > failed\n");
> > > > > -               client_data->flag_retry = true;
> > > > > -               rv = -ENOMEM;
> > > > > -               goto end_err_dma_buf_release;
> > > > > -       }
> > > > > -
> > > > >         ldr_xfer_dma_frag.fragment.hdr.command =
> > > > > LOADER_CMD_XFER_FRAGMENT;
> > > > >         ldr_xfer_dma_frag.fragment.xfer_mode =
> > > > > LOADER_XFER_MODE_DIRECT_DMA;
> > > > >         ldr_xfer_dma_frag.ddr_phys_addr = (u64)dma_buf_phy;
> > > > > @@ -695,14 +686,7 @@ static int ish_fw_xfer_direct_dma(struct
> > > > > ishtp_cl_data *client_data,
> > > > >                 ldr_xfer_dma_frag.fragment.size =
> > > > > fragment_size;
> > > > >                 memcpy(dma_buf, &fw->data[fragment_offset],
> > > > > fragment_size);
> > > > > 
> > > > > -               dma_sync_single_for_device(devc, dma_buf_phy,
> > > > > -                                          payload_max_size,
> > > > > -                                          DMA_TO_DEVICE);
> > > > > -
> > > > Any reason for removal of sync?
> > > It is not needed since we are using dma_alloc_coherent(). From
> > > [1]:
> > > """
> > > void *
> > > dma_alloc_coherent(struct device *dev, size_t size,
> > >    dma_addr_t *dma_handle, gfp_t flag)
> > > 
> > > Consistent memory is memory for which a write by either the
> > > device or
> > > the processor can immediately be read by the processor or device
> > > without having to worry about caching effects.  (You may however
> > > need
> > > to make sure to flush the processor's write buffers before
> > > telling
> > > devices to read that memory.)
> > > 
> > > This routine allocates a region of <size> bytes of consistent
> > > memory.
> > > """'
> > > 
> >  I checked with some dma folks. This call may still be required for
> > some device. Most likely not as this is on chip device.
> > What happens if you leave this call? I worry for regression on some
> > old
> > systems.
I couldn't find any Chrome device to experiment.
Please try with "swiotlb=force" and if it works, then this if fine to
take the patch as is.

Thanks,
Srinivas

> I truly believe this call is unnecessary: From the docs, dma_sync_...
> is only for streaming DMA mappings, not for coherent memory. Another
> link here:
> http://lists.infradead.org/pipermail/linux-arm-kernel/2015-May/341712.html
> Looking at older drivers, for instance exec_drive_command() in
> driver/block/mtip32xx/mtip32xx.c, there are no dma_sync_ between
> dma_alloc_ and dma_free_.
> In a simple driver - drivers/scsi/advansys.c - that only uses
> coherent
> DMA memory, no dma_sync calls are used.
> As long as we have a memory barrier before sending the packet
> downstream, we are covered. I rebooted my machine in a loop without
> error overnight.
> 
> Gwendal.
> 
> > 
> > Thanks,
> > Srinivas
> > 
> > 
> > 
> > > > Thanks,
> > > > Srinivas
> > > > 
> > > > > -               /*
> > > > > -                * Flush cache here because the
> > > > > dma_sync_single_for_device()
> > > > > -                * does not do for x86.
> > > > > -                */
> > > > > +               /* Flush cache to be sure the data is in main
> > > > > memory.
> > > > > */
> > > > >                 clflush_cache_range(dma_buf,
> > > > > payload_max_size);
> > > > > 
> > > > >                 dev_dbg(cl_data_to_dev(client_data),
> > > > > @@ -725,15 +709,8 @@ static int ish_fw_xfer_direct_dma(struct
> > > > > ishtp_cl_data *client_data,
> > > > >                 fragment_offset += fragment_size;
> > > > >         }
> > > > > 
> > > > > -       dma_unmap_single(devc, dma_buf_phy, payload_max_size,
> > > > > DMA_TO_DEVICE);
> > > > > -       kfree(dma_buf);
> > > > > -       return 0;
> > > > > -
> > > > >  end_err_resp_buf_release:
> > > > > -       /* Free ISH buffer if not done already, in error case
> > > > > */
> > > > > -       dma_unmap_single(devc, dma_buf_phy, payload_max_size,
> > > > > DMA_TO_DEVICE);
> > > > > -end_err_dma_buf_release:
> > > > > -       kfree(dma_buf);
> > > > > +       dma_free_coherent(devc, payload_max_size, dma_buf,
> > > > > dma_buf_phy);
> > > > >         return rv;
> > > > >  }
> > > > > 
> > > > 
> > > 
> > > [1]https://www.kernel.org/doc/Documentation/DMA-API.txt
> >
Gwendal Grignou March 11, 2022, 4:50 p.m. UTC | #8
On Fri, Mar 4, 2022 at 10:05 AM srinivas pandruvada
<srinivas.pandruvada@linux.intel.com> wrote:
>
> On Thu, 2022-02-10 at 10:34 -0800, Gwendal Grignou wrote:
> > On Wed, Feb 9, 2022 at 6:51 PM srinivas pandruvada
> > <srinivas.pandruvada@linux.intel.com> wrote:
> > >
> > > On Wed, 2022-02-09 at 16:59 -0800, Gwendal Grignou wrote:
> > > > On Wed, Feb 9, 2022 at 10:52 AM srinivas pandruvada
> > > > <srinivas.pandruvada@linux.intel.com> wrote:
> > > > >
> > > > > On Tue, 2022-02-08 at 21:09 -0800, Gwendal Grignou wrote:
> > > > > > Allocating memory with kmalloc and GPF_DMA32 is not allowed,
> > > > > > the
> > > > > > allocator will ignore the attribute.
> > > > > >
> > > > > Looks like there is new error flow added here for this flag.
> > > > > Is this just removing GFP_DMA32 not enough?
> > > > It was already ignored. It is not enough and I don't know why
> > > > since
> > > > the virtual and physical addresses are in the same range:
> > > >
> > > > With using kmalloc/dma_single_sync:
> > > > 5.10 (firmware not loading)
> > > > [    3.837996] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > > kmalloc/dma_map_single: v:0xffff91531ae88000,
> > > > phy:0x0000000085afe000
> > > > [    3.838003] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > > > ddr_phys_addr=0x0000000085afe000
> > > > ...
> > > >
> > > > 4.19 (firmware loading)
> > > > [    3.878300] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > > kmalloc/dma_map_single: v:0xffff88c145480000,
> > > > phy:0x0000000085480000
> > > > [    3.878322] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > > > ddr_phys_addr=0x0000000085480000
> > > > ...
> > > >
> > > > I also considered flushing the CPU cache before the
> > > > dma_sync_single_for_device call, and calling
> > > > dma_sync_single_for_cpu()
> > > > after loader_cl_send() to be allowed to write into the buffer
> > > > again.
> > > > But these did not help either.
> > > >
> > > > But using dma_alloc_coherent() clearly works and its simpler API
> > > > makes
> > > That is OK.
> > >
> > > > it more appropriate for the task at hand.
> > > >
> > > > For reference, the same log when using dma_alloc_coherent().
> > > > [    3.779723] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > > dma_alloc_coherent: v:0xffff9beb81048000, phy:0x0000000001048000
> > > > [    3.779731] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > > > ddr_phys_addr=0x0000000001048000
> > > > ...
> > > >
> > > > >
> > > > > > Instead, use dma_alloc_coherent() API as we allocate a small
> > > > > > amount
> > > > > > of
> > > > > > memory to transfer firmware fragment to the ISH.
> > > > > >
> > > > > > On Arcada chromebook, after the patch the warning:
> > > > > > "Unexpected gfp: 0x4 (GFP_DMA32). Fixing up to gfp: 0xcc0
> > > > > > (GFP_KERNEL).  Fix your code!"
> > > > > > is gone. The ISH firmware is loaded properly and we can
> > > > > > interact
> > > > > > with
> > > > > > the ISH:
> > > > > > > ectool  --name cros_ish version
> > > > > > ...
> > > > > > Build info:    arcada_ish_v2.0.3661+3c1a1c1ae0 2022-02-08
> > > > > > 05:37:47
> > > > > > @localhost
> > > > > > Tool version:  v2.0.12300-900b03ec7f 2022-02-08 10:01:48
> > > > > > @localhost
> > > > > >
> > > > > > Fixes: commit 91b228107da3 ("HID: intel-ish-hid: ISH firmware
> > > > > > loader
> > > > > > client driver")
> > > > > > Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
> > > > > > Cc: stable@vger.kernel.org
> > > > > > ---
> > > > > >  drivers/hid/intel-ish-hid/ishtp-fw-loader.c | 29 +++--------
> > > > > > ----
> > > > > > ----
> > > > > > --
> > > > > >  1 file changed, 3 insertions(+), 26 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > > b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > > index e24988586710..16aa030af845 100644
> > > > > > --- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > > +++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > > @@ -661,21 +661,12 @@ static int
> > > > > > ish_fw_xfer_direct_dma(struct
> > > > > > ishtp_cl_data *client_data,
> > > > > >          */
> > > > > >         payload_max_size &= ~(L1_CACHE_BYTES - 1);
> > > > > >
> > > > > > -       dma_buf = kmalloc(payload_max_size, GFP_KERNEL |
> > > > > > GFP_DMA32);
> > > > > > +       dma_buf = dma_alloc_coherent(devc, payload_max_size,
> > > > > > &dma_buf_phy, GFP_KERNEL);
> > > > > >         if (!dma_buf) {
> > > > > >                 client_data->flag_retry = true;
> > > > > >                 return -ENOMEM;
> > > > > >         }
> > > > > >
> > > > > > -       dma_buf_phy = dma_map_single(devc, dma_buf,
> > > > > > payload_max_size,
> > > > > > -                                    DMA_TO_DEVICE);
> > > > > > -       if (dma_mapping_error(devc, dma_buf_phy)) {
> > > > > > -               dev_err(cl_data_to_dev(client_data), "DMA map
> > > > > > failed\n");
> > > > > > -               client_data->flag_retry = true;
> > > > > > -               rv = -ENOMEM;
> > > > > > -               goto end_err_dma_buf_release;
> > > > > > -       }
> > > > > > -
> > > > > >         ldr_xfer_dma_frag.fragment.hdr.command =
> > > > > > LOADER_CMD_XFER_FRAGMENT;
> > > > > >         ldr_xfer_dma_frag.fragment.xfer_mode =
> > > > > > LOADER_XFER_MODE_DIRECT_DMA;
> > > > > >         ldr_xfer_dma_frag.ddr_phys_addr = (u64)dma_buf_phy;
> > > > > > @@ -695,14 +686,7 @@ static int ish_fw_xfer_direct_dma(struct
> > > > > > ishtp_cl_data *client_data,
> > > > > >                 ldr_xfer_dma_frag.fragment.size =
> > > > > > fragment_size;
> > > > > >                 memcpy(dma_buf, &fw->data[fragment_offset],
> > > > > > fragment_size);
> > > > > >
> > > > > > -               dma_sync_single_for_device(devc, dma_buf_phy,
> > > > > > -                                          payload_max_size,
> > > > > > -                                          DMA_TO_DEVICE);
> > > > > > -
> > > > > Any reason for removal of sync?
> > > > It is not needed since we are using dma_alloc_coherent(). From
> > > > [1]:
> > > > """
> > > > void *
> > > > dma_alloc_coherent(struct device *dev, size_t size,
> > > >    dma_addr_t *dma_handle, gfp_t flag)
> > > >
> > > > Consistent memory is memory for which a write by either the
> > > > device or
> > > > the processor can immediately be read by the processor or device
> > > > without having to worry about caching effects.  (You may however
> > > > need
> > > > to make sure to flush the processor's write buffers before
> > > > telling
> > > > devices to read that memory.)
> > > >
> > > > This routine allocates a region of <size> bytes of consistent
> > > > memory.
> > > > """'
> > > >
> > >  I checked with some dma folks. This call may still be required for
> > > some device. Most likely not as this is on chip device.
> > > What happens if you leave this call? I worry for regression on some
> > > old
> > > systems.
> I couldn't find any Chrome device to experiment.
> Please try with "swiotlb=force" and if it works, then this if fine to
> take the patch as is.
It works. Used an "arcada" [from dmidecode]
System Information
        Manufacturer: Dell Inc.
        Product Name: Arcada

Added "swiotlb=force" to the kernel command line using
'/usr/share/vboot/bin/make_dev_ssd.sh -i  /dev/nvme0n1 --partitions 4
--edit_config`.
Check with 'cat /proc/cmdline' the option is added and tested multiple
reboot and shutdown/power on.

Gwendal.

>
> Thanks,
> Srinivas
>
> > I truly believe this call is unnecessary: From the docs, dma_sync_...
> > is only for streaming DMA mappings, not for coherent memory. Another
> > link here:
> > http://lists.infradead.org/pipermail/linux-arm-kernel/2015-May/341712.html
> > Looking at older drivers, for instance exec_drive_command() in
> > driver/block/mtip32xx/mtip32xx.c, there are no dma_sync_ between
> > dma_alloc_ and dma_free_.
> > In a simple driver - drivers/scsi/advansys.c - that only uses
> > coherent
> > DMA memory, no dma_sync calls are used.
> > As long as we have a memory barrier before sending the packet
> > downstream, we are covered. I rebooted my machine in a loop without
> > error overnight.
> >
> > Gwendal.
> >
> > >
> > > Thanks,
> > > Srinivas
> > >
> > >
> > >
> > > > > Thanks,
> > > > > Srinivas
> > > > >
> > > > > > -               /*
> > > > > > -                * Flush cache here because the
> > > > > > dma_sync_single_for_device()
> > > > > > -                * does not do for x86.
> > > > > > -                */
> > > > > > +               /* Flush cache to be sure the data is in main
> > > > > > memory.
> > > > > > */
> > > > > >                 clflush_cache_range(dma_buf,
> > > > > > payload_max_size);
> > > > > >
> > > > > >                 dev_dbg(cl_data_to_dev(client_data),
> > > > > > @@ -725,15 +709,8 @@ static int ish_fw_xfer_direct_dma(struct
> > > > > > ishtp_cl_data *client_data,
> > > > > >                 fragment_offset += fragment_size;
> > > > > >         }
> > > > > >
> > > > > > -       dma_unmap_single(devc, dma_buf_phy, payload_max_size,
> > > > > > DMA_TO_DEVICE);
> > > > > > -       kfree(dma_buf);
> > > > > > -       return 0;
> > > > > > -
> > > > > >  end_err_resp_buf_release:
> > > > > > -       /* Free ISH buffer if not done already, in error case
> > > > > > */
> > > > > > -       dma_unmap_single(devc, dma_buf_phy, payload_max_size,
> > > > > > DMA_TO_DEVICE);
> > > > > > -end_err_dma_buf_release:
> > > > > > -       kfree(dma_buf);
> > > > > > +       dma_free_coherent(devc, payload_max_size, dma_buf,
> > > > > > dma_buf_phy);
> > > > > >         return rv;
> > > > > >  }
> > > > > >
> > > > >
> > > >
> > > > [1]https://www.kernel.org/doc/Documentation/DMA-API.txt
> > >
>
srinivas pandruvada March 11, 2022, 9:22 p.m. UTC | #9
On Tue, 2022-02-08 at 21:09 -0800, Gwendal Grignou wrote:
> Allocating memory with kmalloc and GPF_DMA32 is not allowed, the
> allocator will ignore the attribute.
> 
> Instead, use dma_alloc_coherent() API as we allocate a small amount of
> memory to transfer firmware fragment to the ISH.
> 
> On Arcada chromebook, after the patch the warning:
> "Unexpected gfp: 0x4 (GFP_DMA32). Fixing up to gfp: 0xcc0
> (GFP_KERNEL).  Fix your code!"
> is gone. The ISH firmware is loaded properly and we can interact with
> the ISH:
> > ectool  --name cros_ish version
> ...
> Build info:    arcada_ish_v2.0.3661+3c1a1c1ae0 2022-02-08 05:37:47
> @localhost
> Tool version:  v2.0.12300-900b03ec7f 2022-02-08 10:01:48 @localhost
> 
> Fixes: commit 91b228107da3 ("HID: intel-ish-hid: ISH firmware loader
> client driver")
> Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
> Cc: stable@vger.kernel.org
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>

> ---
>  drivers/hid/intel-ish-hid/ishtp-fw-loader.c | 29 +++------------------
>  1 file changed, 3 insertions(+), 26 deletions(-)
> 
> diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> index e24988586710..16aa030af845 100644
> --- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> +++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> @@ -661,21 +661,12 @@ static int ish_fw_xfer_direct_dma(struct
> ishtp_cl_data *client_data,
>          */
>         payload_max_size &= ~(L1_CACHE_BYTES - 1);
>  
> -       dma_buf = kmalloc(payload_max_size, GFP_KERNEL | GFP_DMA32);
> +       dma_buf = dma_alloc_coherent(devc, payload_max_size,
> &dma_buf_phy, GFP_KERNEL);
>         if (!dma_buf) {
>                 client_data->flag_retry = true;
>                 return -ENOMEM;
>         }
>  
> -       dma_buf_phy = dma_map_single(devc, dma_buf, payload_max_size,
> -                                    DMA_TO_DEVICE);
> -       if (dma_mapping_error(devc, dma_buf_phy)) {
> -               dev_err(cl_data_to_dev(client_data), "DMA map
> failed\n");
> -               client_data->flag_retry = true;
> -               rv = -ENOMEM;
> -               goto end_err_dma_buf_release;
> -       }
> -
>         ldr_xfer_dma_frag.fragment.hdr.command =
> LOADER_CMD_XFER_FRAGMENT;
>         ldr_xfer_dma_frag.fragment.xfer_mode =
> LOADER_XFER_MODE_DIRECT_DMA;
>         ldr_xfer_dma_frag.ddr_phys_addr = (u64)dma_buf_phy;
> @@ -695,14 +686,7 @@ static int ish_fw_xfer_direct_dma(struct
> ishtp_cl_data *client_data,
>                 ldr_xfer_dma_frag.fragment.size = fragment_size;
>                 memcpy(dma_buf, &fw->data[fragment_offset],
> fragment_size);
>  
> -               dma_sync_single_for_device(devc, dma_buf_phy,
> -                                          payload_max_size,
> -                                          DMA_TO_DEVICE);
> -
> -               /*
> -                * Flush cache here because the
> dma_sync_single_for_device()
> -                * does not do for x86.
> -                */
> +               /* Flush cache to be sure the data is in main memory.
> */
>                 clflush_cache_range(dma_buf, payload_max_size);
>  
>                 dev_dbg(cl_data_to_dev(client_data),
> @@ -725,15 +709,8 @@ static int ish_fw_xfer_direct_dma(struct
> ishtp_cl_data *client_data,
>                 fragment_offset += fragment_size;
>         }
>  
> -       dma_unmap_single(devc, dma_buf_phy, payload_max_size,
> DMA_TO_DEVICE);
> -       kfree(dma_buf);
> -       return 0;
> -
>  end_err_resp_buf_release:
> -       /* Free ISH buffer if not done already, in error case */
> -       dma_unmap_single(devc, dma_buf_phy, payload_max_size,
> DMA_TO_DEVICE);
> -end_err_dma_buf_release:
> -       kfree(dma_buf);
> +       dma_free_coherent(devc, payload_max_size, dma_buf,
> dma_buf_phy);
>         return rv;
>  }
>
Jiri Kosina March 14, 2022, 9:39 a.m. UTC | #10
On Tue, 8 Feb 2022, Gwendal Grignou wrote:

> Allocating memory with kmalloc and GPF_DMA32 is not allowed, the
> allocator will ignore the attribute.
> 
> Instead, use dma_alloc_coherent() API as we allocate a small amount of
> memory to transfer firmware fragment to the ISH.
> 
> On Arcada chromebook, after the patch the warning:
> "Unexpected gfp: 0x4 (GFP_DMA32). Fixing up to gfp: 0xcc0 (GFP_KERNEL).  Fix your code!"
> is gone. The ISH firmware is loaded properly and we can interact with
> the ISH:
> > ectool  --name cros_ish version
> ...
> Build info:    arcada_ish_v2.0.3661+3c1a1c1ae0 2022-02-08 05:37:47 @localhost
> Tool version:  v2.0.12300-900b03ec7f 2022-02-08 10:01:48 @localhost
> 
> Fixes: commit 91b228107da3 ("HID: intel-ish-hid: ISH firmware loader client driver")
> Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
> Cc: stable@vger.kernel.org

Applied, thanks.
diff mbox series

Patch

diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
index e24988586710..16aa030af845 100644
--- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
+++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
@@ -661,21 +661,12 @@  static int ish_fw_xfer_direct_dma(struct ishtp_cl_data *client_data,
 	 */
 	payload_max_size &= ~(L1_CACHE_BYTES - 1);
 
-	dma_buf = kmalloc(payload_max_size, GFP_KERNEL | GFP_DMA32);
+	dma_buf = dma_alloc_coherent(devc, payload_max_size, &dma_buf_phy, GFP_KERNEL);
 	if (!dma_buf) {
 		client_data->flag_retry = true;
 		return -ENOMEM;
 	}
 
-	dma_buf_phy = dma_map_single(devc, dma_buf, payload_max_size,
-				     DMA_TO_DEVICE);
-	if (dma_mapping_error(devc, dma_buf_phy)) {
-		dev_err(cl_data_to_dev(client_data), "DMA map failed\n");
-		client_data->flag_retry = true;
-		rv = -ENOMEM;
-		goto end_err_dma_buf_release;
-	}
-
 	ldr_xfer_dma_frag.fragment.hdr.command = LOADER_CMD_XFER_FRAGMENT;
 	ldr_xfer_dma_frag.fragment.xfer_mode = LOADER_XFER_MODE_DIRECT_DMA;
 	ldr_xfer_dma_frag.ddr_phys_addr = (u64)dma_buf_phy;
@@ -695,14 +686,7 @@  static int ish_fw_xfer_direct_dma(struct ishtp_cl_data *client_data,
 		ldr_xfer_dma_frag.fragment.size = fragment_size;
 		memcpy(dma_buf, &fw->data[fragment_offset], fragment_size);
 
-		dma_sync_single_for_device(devc, dma_buf_phy,
-					   payload_max_size,
-					   DMA_TO_DEVICE);
-
-		/*
-		 * Flush cache here because the dma_sync_single_for_device()
-		 * does not do for x86.
-		 */
+		/* Flush cache to be sure the data is in main memory. */
 		clflush_cache_range(dma_buf, payload_max_size);
 
 		dev_dbg(cl_data_to_dev(client_data),
@@ -725,15 +709,8 @@  static int ish_fw_xfer_direct_dma(struct ishtp_cl_data *client_data,
 		fragment_offset += fragment_size;
 	}
 
-	dma_unmap_single(devc, dma_buf_phy, payload_max_size, DMA_TO_DEVICE);
-	kfree(dma_buf);
-	return 0;
-
 end_err_resp_buf_release:
-	/* Free ISH buffer if not done already, in error case */
-	dma_unmap_single(devc, dma_buf_phy, payload_max_size, DMA_TO_DEVICE);
-end_err_dma_buf_release:
-	kfree(dma_buf);
+	dma_free_coherent(devc, payload_max_size, dma_buf, dma_buf_phy);
 	return rv;
 }