Message ID | 20200504092611.9798-30-laurent.pinchart@ideasonboard.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Drivers for the BCM283x CSI-2/CCP2 receiver and ISP | expand |
Hi Phil, Laurent, On Mon, 2020-05-04 at 12:26 +0300, Laurent Pinchart wrote: > From: Phil Elwell <phil@raspberrypi.org> > > Conditional on a new compatible string, change the pagelist encoding > such that the top 24 bits are the pfn, leaving 8 bits for run length > (-1). > > Signed-off-by: Phil Elwell <phil@raspberrypi.org> > Signed-off-by: Jacopo Mondi <jacopo@jmondi.org> > --- > .../interface/vchiq_arm/vchiq_2835_arm.c | 88 ++++++++++++++----- > .../interface/vchiq_arm/vchiq_arm.c | 6 ++ > .../interface/vchiq_arm/vchiq_arm.h | 1 + > 3 files changed, 74 insertions(+), 21 deletions(-) > > diff --git > a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c > b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c > index 3e422a7eb3f1..ecec84ad4345 100644 > --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c > +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c > @@ -16,6 +16,8 @@ > #include <soc/bcm2835/raspberrypi-firmware.h> > > #define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32) > +#define VC_SAFE(x) (g_use_36bit_addrs ? ((u32)(x) | 0xc0000000) : (u32)(x)) > +#define IS_VC_SAFE(x) (g_use_36bit_addrs ? !((x) & ~0x3fffffffull) : 1) As I commented earlier, this is a sign your dma-ranges are wrong, most of the code below reimplements what is already done by dma-direct (see kernel/dma/direct.c). Once properly setup, you should be able to use whatever phys address dmam_alloc_coherent() provides and drop g_use_36bit_addrs. Note that on arm32+LPAE, dma-direct/swiotlb are the default dma_ops, so this also applies there. Regards, Nicolas > #include "vchiq_arm.h" > #include "vchiq_connected.h" > @@ -62,6 +64,7 @@ static void __iomem *g_regs; > */ > static unsigned int g_cache_line_size = 32; > static struct dma_pool *g_dma_pool; > +static unsigned int g_use_36bit_addrs = 0; > static unsigned int g_fragments_size; > static char *g_fragments_base; > static char *g_free_fragments; > @@ -104,6 +107,8 @@ int vchiq_platform_init(struct platform_device *pdev, > struct vchiq_state *state) > g_cache_line_size = drvdata->cache_line_size; > g_fragments_size = 2 * g_cache_line_size; > > + g_use_36bit_addrs = (dev->dma_pfn_offset == 0); > + > /* Allocate space for the channels in coherent memory */ > slot_mem_size = PAGE_ALIGN(TOTAL_SLOTS * VCHIQ_SLOT_SIZE); > frag_mem_size = PAGE_ALIGN(g_fragments_size * MAX_FRAGMENTS); > @@ -115,14 +120,21 @@ int vchiq_platform_init(struct platform_device *pdev, > struct vchiq_state *state) > return -ENOMEM; > } > > + if (!IS_VC_SAFE(slot_phys)) { > + dev_err(dev, "allocated DMA memory %pad is not VC-safe\n", > + &slot_phys); > + return -ENOMEM; > + } > + > WARN_ON(((unsigned long)slot_mem & (PAGE_SIZE - 1)) != 0); > + channelbase = VC_SAFE(slot_phys); > > vchiq_slot_zero = vchiq_init_slots(slot_mem, slot_mem_size); > if (!vchiq_slot_zero) > return -EINVAL; > > vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_OFFSET_IDX] = > - (int)slot_phys + slot_mem_size; > + channelbase + slot_mem_size; > vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_COUNT_IDX] = > MAX_FRAGMENTS; > > @@ -155,7 +167,6 @@ int vchiq_platform_init(struct platform_device *pdev, > struct vchiq_state *state) > } > > /* Send the base address of the slots to VideoCore */ > - channelbase = slot_phys; > err = rpi_firmware_property(fw, RPI_FIRMWARE_VCHIQ_INIT, > &channelbase, sizeof(channelbase)); > if (err || channelbase) { > @@ -241,7 +252,7 @@ vchiq_prepare_bulk_data(struct vchiq_bulk *bulk, void > *offset, int size, > if (!pagelistinfo) > return VCHIQ_ERROR; > > - bulk->data = (void *)(unsigned long)pagelistinfo->dma_addr; > + bulk->data = (void *)VC_SAFE(pagelistinfo->dma_addr); > > /* > * Store the pagelistinfo address in remote_data, > @@ -475,25 +486,60 @@ create_pagelist(char __user *buf, size_t count, unsigned > short type) > > /* Combine adjacent blocks for performance */ > k = 0; > - for_each_sg(scatterlist, sg, dma_buffers, i) { > - u32 len = sg_dma_len(sg); > - u32 addr = sg_dma_address(sg); > + if (g_use_36bit_addrs) { > + for_each_sg(scatterlist, sg, dma_buffers, i) { > + u32 len = sg_dma_len(sg); > + u64 addr = sg_dma_address(sg); > + u32 page_id = (u32)((addr >> 4) & ~0xff); > + u32 sg_pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; > > - /* Note: addrs is the address + page_count - 1 > - * The firmware expects blocks after the first to be page- > - * aligned and a multiple of the page size > - */ > - WARN_ON(len == 0); > - WARN_ON(i && (i != (dma_buffers - 1)) && (len & ~PAGE_MASK)); > - WARN_ON(i && (addr & ~PAGE_MASK)); > - if (k > 0 && > - ((addrs[k - 1] & PAGE_MASK) + > - (((addrs[k - 1] & ~PAGE_MASK) + 1) << PAGE_SHIFT)) > - == (addr & PAGE_MASK)) > - addrs[k - 1] += ((len + PAGE_SIZE - 1) >> PAGE_SHIFT); > - else > - addrs[k++] = (addr & PAGE_MASK) | > - (((len + PAGE_SIZE - 1) >> PAGE_SHIFT) - 1); > + /* Note: addrs is the address + page_count - 1 > + * The firmware expects blocks after the first to be > page- > + * aligned and a multiple of the page size > + */ > + WARN_ON(len == 0); > + WARN_ON(i && > + (i != (dma_buffers - 1)) && (len & ~PAGE_MASK)); > + WARN_ON(i && (addr & ~PAGE_MASK)); > + WARN_ON(upper_32_bits(addr) > 0xf); > + if (k > 0 && > + ((addrs[k - 1] & ~0xff) + > + (((addrs[k - 1] & 0xff) + 1) << 8) > + == page_id)) { > + u32 inc_pages = min(sg_pages, > + 0xff - (addrs[k - 1] & > 0xff)); > + addrs[k - 1] += inc_pages; > + page_id += inc_pages << 8; > + sg_pages -= inc_pages; > + } > + while (sg_pages) { > + u32 inc_pages = min(sg_pages, 0x100u); > + addrs[k++] = page_id | (inc_pages - 1); > + page_id += inc_pages << 8; > + sg_pages -= inc_pages; > + } > + } > + } else { > + for_each_sg(scatterlist, sg, dma_buffers, i) { > + u32 len = sg_dma_len(sg); > + u32 addr = VC_SAFE(sg_dma_address(sg)); > + u32 new_pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; > + > + /* Note: addrs is the address + page_count - 1 > + * The firmware expects blocks after the first to be > page- > + * aligned and a multiple of the page size > + */ > + WARN_ON(len == 0); > + WARN_ON(i && (i != (dma_buffers - 1)) && (len & > ~PAGE_MASK)); > + WARN_ON(i && (addr & ~PAGE_MASK)); > + if (k > 0 && > + ((addrs[k - 1] & PAGE_MASK) + > + (((addrs[k - 1] & ~PAGE_MASK) + 1) << PAGE_SHIFT)) > + == (addr & PAGE_MASK)) > + addrs[k - 1] += new_pages; > + else > + addrs[k++] = (addr & PAGE_MASK) | (new_pages - > 1); > + } > } > > /* Partial cache lines (fragments) require special measures */ > diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c > b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c > index d1a556f16499..dd3c8f829daa 100644 > --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c > +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c > @@ -117,6 +117,11 @@ static struct vchiq_drvdata bcm2836_drvdata = { > .cache_line_size = 64, > }; > > +static struct vchiq_drvdata bcm2711_drvdata = { > + .cache_line_size = 64, > + .use_36bit_addrs = true, > +}; > + > static const char *const ioctl_names[] = { > "CONNECT", > "SHUTDOWN", > @@ -2710,6 +2715,7 @@ void vchiq_platform_conn_state_changed(struct > vchiq_state *state, > static const struct of_device_id vchiq_of_match[] = { > { .compatible = "brcm,bcm2835-vchiq", .data = &bcm2835_drvdata }, > { .compatible = "brcm,bcm2836-vchiq", .data = &bcm2836_drvdata }, > + { .compatible = "brcm,bcm2711-vchiq", .data = &bcm2711_drvdata }, > {}, > }; > MODULE_DEVICE_TABLE(of, vchiq_of_match); > diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h > b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h > index 0784c5002417..f8b1c005af62 100644 > --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h > +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h > @@ -52,6 +52,7 @@ struct vchiq_arm_state { > > struct vchiq_drvdata { > const unsigned int cache_line_size; > + const bool use_36bit_addrs; > struct rpi_firmware *fw; > }; >
Hi Nicolas, On 04/05/2020 18:40, Nicolas Saenz Julienne wrote: > Hi Phil, Laurent, > > On Mon, 2020-05-04 at 12:26 +0300, Laurent Pinchart wrote: >> From: Phil Elwell <phil@raspberrypi.org> >> >> Conditional on a new compatible string, change the pagelist encoding >> such that the top 24 bits are the pfn, leaving 8 bits for run length >> (-1). >> >> Signed-off-by: Phil Elwell <phil@raspberrypi.org> >> Signed-off-by: Jacopo Mondi <jacopo@jmondi.org> >> --- >> .../interface/vchiq_arm/vchiq_2835_arm.c | 88 ++++++++++++++----- >> .../interface/vchiq_arm/vchiq_arm.c | 6 ++ >> .../interface/vchiq_arm/vchiq_arm.h | 1 + >> 3 files changed, 74 insertions(+), 21 deletions(-) >> >> diff --git >> a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c >> b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c >> index 3e422a7eb3f1..ecec84ad4345 100644 >> --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c >> +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c >> @@ -16,6 +16,8 @@ >> #include <soc/bcm2835/raspberrypi-firmware.h> >> #define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32) >> +#define VC_SAFE(x) (g_use_36bit_addrs ? ((u32)(x) | 0xc0000000) : (u32)(x)) >> +#define IS_VC_SAFE(x) (g_use_36bit_addrs ? !((x) & ~0x3fffffffull) : 1) > > As I commented earlier, this is a sign your dma-ranges are wrong, most of the > code below reimplements what is already done by dma-direct (see > kernel/dma/direct.c). Once properly setup, you should be able to use whatever > phys address dmam_alloc_coherent() provides and drop g_use_36bit_addrs. > > Note that on arm32+LPAE, dma-direct/swiotlb are the default dma_ops, so this > also applies there. As I explained in an offline email, the problem is that VCHIQ needs access to two kinds of "DMA" addresses - those suitable for the 40-bit DMA channel, and the 30-bit addresses that the VPU can use. Since each DT node only has access to a single set of DMA ranges, I can't see how to use dma-direct to calculate addreses for everything, but feel free to submit an alternative implementation showing how it should be done. Phil
On Mon, 2020-05-04 at 21:46 +0100, Phil Elwell wrote: > Hi Nicolas, > > On 04/05/2020 18:40, Nicolas Saenz Julienne wrote: > > Hi Phil, Laurent, > > > > On Mon, 2020-05-04 at 12:26 +0300, Laurent Pinchart wrote: > > > From: Phil Elwell <phil@raspberrypi.org> > > > > > > Conditional on a new compatible string, change the pagelist encoding > > > such that the top 24 bits are the pfn, leaving 8 bits for run length > > > (-1). > > > > > > Signed-off-by: Phil Elwell <phil@raspberrypi.org> > > > Signed-off-by: Jacopo Mondi <jacopo@jmondi.org> > > > --- > > > .../interface/vchiq_arm/vchiq_2835_arm.c | 88 ++++++++++++++----- > > > .../interface/vchiq_arm/vchiq_arm.c | 6 ++ > > > .../interface/vchiq_arm/vchiq_arm.h | 1 + > > > 3 files changed, 74 insertions(+), 21 deletions(-) > > > > > > diff --git > > > a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c > > > b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c > > > index 3e422a7eb3f1..ecec84ad4345 100644 > > > --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c > > > +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c > > > @@ -16,6 +16,8 @@ > > > #include <soc/bcm2835/raspberrypi-firmware.h> > > > #define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32) > > > +#define VC_SAFE(x) (g_use_36bit_addrs ? ((u32)(x) | 0xc0000000) : > > > (u32)(x)) > > > +#define IS_VC_SAFE(x) (g_use_36bit_addrs ? !((x) & ~0x3fffffffull) : 1) > > > > As I commented earlier, this is a sign your dma-ranges are wrong, most of > > the > > code below reimplements what is already done by dma-direct (see > > kernel/dma/direct.c). Once properly setup, you should be able to use > > whatever > > phys address dmam_alloc_coherent() provides and drop g_use_36bit_addrs. > > > > Note that on arm32+LPAE, dma-direct/swiotlb are the default dma_ops, so this > > also applies there. > > As I explained in an offline email, the problem is that VCHIQ needs access to We discussed this privately, but I wanted to start from scratch, specially as I hope I won't be the only one reviewing this :). > two > kinds of "DMA" addresses - those suitable for the 40-bit DMA channel, and the > 30-bit addresses that the VPU can use. Since each DT node only has access to a > single set of DMA ranges, I can't see how to use dma-direct to calculate > addreses > for everything, but feel free to submit an alternative implementation showing > how > it should be done. How about this): - Move vchiq to /soc - Get a handle to the relevant dma controller device pointer (so 30-bit DMA controller on old RPis, 40-bit on RPi4) - Allocate slotmem/pagelist with dma_alloc_coherent(vpu_dev, ...) - Map pages with dma_map_sg(dma_dev, ...) I hope I'm not missing anything, but short of implementing it and seeing the end result, I think you'd be free of any rpi[1-3]/rpi4 distinction in the code. Regards, Nicolas
Hi Nicolas, On Tue, 5 May 2020 at 11:13, Nicolas Saenz Julienne <nsaenzjulienne@suse.de> wrote: > > On Mon, 2020-05-04 at 21:46 +0100, Phil Elwell wrote: > > Hi Nicolas, > > > > On 04/05/2020 18:40, Nicolas Saenz Julienne wrote: > > > Hi Phil, Laurent, > > > > > > On Mon, 2020-05-04 at 12:26 +0300, Laurent Pinchart wrote: > > > > From: Phil Elwell <phil@raspberrypi.org> > > > > > > > > Conditional on a new compatible string, change the pagelist encoding > > > > such that the top 24 bits are the pfn, leaving 8 bits for run length > > > > (-1). > > > > > > > > Signed-off-by: Phil Elwell <phil@raspberrypi.org> > > > > Signed-off-by: Jacopo Mondi <jacopo@jmondi.org> > > > > --- > > > > .../interface/vchiq_arm/vchiq_2835_arm.c | 88 ++++++++++++++----- > > > > .../interface/vchiq_arm/vchiq_arm.c | 6 ++ > > > > .../interface/vchiq_arm/vchiq_arm.h | 1 + > > > > 3 files changed, 74 insertions(+), 21 deletions(-) > > > > > > > > diff --git > > > > a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c > > > > b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c > > > > index 3e422a7eb3f1..ecec84ad4345 100644 > > > > --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c > > > > +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c > > > > @@ -16,6 +16,8 @@ > > > > #include <soc/bcm2835/raspberrypi-firmware.h> > > > > #define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32) > > > > +#define VC_SAFE(x) (g_use_36bit_addrs ? ((u32)(x) | 0xc0000000) : > > > > (u32)(x)) > > > > +#define IS_VC_SAFE(x) (g_use_36bit_addrs ? !((x) & ~0x3fffffffull) : 1) > > > > > > As I commented earlier, this is a sign your dma-ranges are wrong, most of > > > the > > > code below reimplements what is already done by dma-direct (see > > > kernel/dma/direct.c). Once properly setup, you should be able to use > > > whatever > > > phys address dmam_alloc_coherent() provides and drop g_use_36bit_addrs. > > > > > > Note that on arm32+LPAE, dma-direct/swiotlb are the default dma_ops, so this > > > also applies there. > > > > As I explained in an offline email, the problem is that VCHIQ needs access to > > We discussed this privately, but I wanted to start from scratch, specially as I > hope I won't be the only one reviewing this :). > > > two > > kinds of "DMA" addresses - those suitable for the 40-bit DMA channel, and the > > 30-bit addresses that the VPU can use. Since each DT node only has access to a > > single set of DMA ranges, I can't see how to use dma-direct to calculate > > addreses > > for everything, but feel free to submit an alternative implementation showing > > how > > it should be done. > > How about this): > - Move vchiq to /soc > - Get a handle to the relevant dma controller device pointer (so 30-bit DMA > controller on old RPis, 40-bit on RPi4) > - Allocate slotmem/pagelist with dma_alloc_coherent(vpu_dev, ...) > - Map pages with dma_map_sg(dma_dev, ...) > > I hope I'm not missing anything, but short of implementing it and seeing the > end result, I think you'd be free of any rpi[1-3]/rpi4 distinction in the code. Thanks for the suggestion - I hadn't considered using a device pointer for the controller rather than the current (client) device. If that works then I propose to make the 40-bit dma property optional such that dma_dev defaults to vpu_dev. Phil
Hi Laurent, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on linus/master] [also build test WARNING on v5.7-rc4] [cannot apply to linuxtv-media/master anholt/for-next next-20200505] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system. BTW, we also suggest to use '--base' option to specify the base tree in git format-patch, please see https://stackoverflow.com/a/37406982] url: https://github.com/0day-ci/linux/commits/Laurent-Pinchart/Drivers-for-the-BCM283x-CSI-2-CCP2-receiver-and-ISP/20200505-054310 base: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 9851a0dee7c28514f149f7e4f60ec1b06286cc1b config: s390-allyesconfig (attached as .config) compiler: s390-linux-gcc (GCC) 9.3.0 reproduce: wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day GCC_VERSION=9.3.0 make.cross ARCH=s390 If you fix the issue, kindly add following tag as appropriate Reported-by: kbuild test robot <lkp@intel.com> All warnings (new ones prefixed by >>): drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c: In function 'vchiq_prepare_bulk_data': >> drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c:255:15: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] 255 | bulk->data = (void *)VC_SAFE(pagelistinfo->dma_addr); | ^ vim +255 drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c 240 241 enum vchiq_status 242 vchiq_prepare_bulk_data(struct vchiq_bulk *bulk, void *offset, int size, 243 int dir) 244 { 245 struct vchiq_pagelist_info *pagelistinfo; 246 247 pagelistinfo = create_pagelist((char __user *)offset, size, 248 (dir == VCHIQ_BULK_RECEIVE) 249 ? PAGELIST_READ 250 : PAGELIST_WRITE); 251 252 if (!pagelistinfo) 253 return VCHIQ_ERROR; 254 > 255 bulk->data = (void *)VC_SAFE(pagelistinfo->dma_addr); 256 257 /* 258 * Store the pagelistinfo address in remote_data, 259 * which isn't used by the slave. 260 */ 261 bulk->remote_data = pagelistinfo; 262 263 return VCHIQ_SUCCESS; 264 } 265 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c index 3e422a7eb3f1..ecec84ad4345 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c @@ -16,6 +16,8 @@ #include <soc/bcm2835/raspberrypi-firmware.h> #define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32) +#define VC_SAFE(x) (g_use_36bit_addrs ? ((u32)(x) | 0xc0000000) : (u32)(x)) +#define IS_VC_SAFE(x) (g_use_36bit_addrs ? !((x) & ~0x3fffffffull) : 1) #include "vchiq_arm.h" #include "vchiq_connected.h" @@ -62,6 +64,7 @@ static void __iomem *g_regs; */ static unsigned int g_cache_line_size = 32; static struct dma_pool *g_dma_pool; +static unsigned int g_use_36bit_addrs = 0; static unsigned int g_fragments_size; static char *g_fragments_base; static char *g_free_fragments; @@ -104,6 +107,8 @@ int vchiq_platform_init(struct platform_device *pdev, struct vchiq_state *state) g_cache_line_size = drvdata->cache_line_size; g_fragments_size = 2 * g_cache_line_size; + g_use_36bit_addrs = (dev->dma_pfn_offset == 0); + /* Allocate space for the channels in coherent memory */ slot_mem_size = PAGE_ALIGN(TOTAL_SLOTS * VCHIQ_SLOT_SIZE); frag_mem_size = PAGE_ALIGN(g_fragments_size * MAX_FRAGMENTS); @@ -115,14 +120,21 @@ int vchiq_platform_init(struct platform_device *pdev, struct vchiq_state *state) return -ENOMEM; } + if (!IS_VC_SAFE(slot_phys)) { + dev_err(dev, "allocated DMA memory %pad is not VC-safe\n", + &slot_phys); + return -ENOMEM; + } + WARN_ON(((unsigned long)slot_mem & (PAGE_SIZE - 1)) != 0); + channelbase = VC_SAFE(slot_phys); vchiq_slot_zero = vchiq_init_slots(slot_mem, slot_mem_size); if (!vchiq_slot_zero) return -EINVAL; vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_OFFSET_IDX] = - (int)slot_phys + slot_mem_size; + channelbase + slot_mem_size; vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_COUNT_IDX] = MAX_FRAGMENTS; @@ -155,7 +167,6 @@ int vchiq_platform_init(struct platform_device *pdev, struct vchiq_state *state) } /* Send the base address of the slots to VideoCore */ - channelbase = slot_phys; err = rpi_firmware_property(fw, RPI_FIRMWARE_VCHIQ_INIT, &channelbase, sizeof(channelbase)); if (err || channelbase) { @@ -241,7 +252,7 @@ vchiq_prepare_bulk_data(struct vchiq_bulk *bulk, void *offset, int size, if (!pagelistinfo) return VCHIQ_ERROR; - bulk->data = (void *)(unsigned long)pagelistinfo->dma_addr; + bulk->data = (void *)VC_SAFE(pagelistinfo->dma_addr); /* * Store the pagelistinfo address in remote_data, @@ -475,25 +486,60 @@ create_pagelist(char __user *buf, size_t count, unsigned short type) /* Combine adjacent blocks for performance */ k = 0; - for_each_sg(scatterlist, sg, dma_buffers, i) { - u32 len = sg_dma_len(sg); - u32 addr = sg_dma_address(sg); + if (g_use_36bit_addrs) { + for_each_sg(scatterlist, sg, dma_buffers, i) { + u32 len = sg_dma_len(sg); + u64 addr = sg_dma_address(sg); + u32 page_id = (u32)((addr >> 4) & ~0xff); + u32 sg_pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; - /* Note: addrs is the address + page_count - 1 - * The firmware expects blocks after the first to be page- - * aligned and a multiple of the page size - */ - WARN_ON(len == 0); - WARN_ON(i && (i != (dma_buffers - 1)) && (len & ~PAGE_MASK)); - WARN_ON(i && (addr & ~PAGE_MASK)); - if (k > 0 && - ((addrs[k - 1] & PAGE_MASK) + - (((addrs[k - 1] & ~PAGE_MASK) + 1) << PAGE_SHIFT)) - == (addr & PAGE_MASK)) - addrs[k - 1] += ((len + PAGE_SIZE - 1) >> PAGE_SHIFT); - else - addrs[k++] = (addr & PAGE_MASK) | - (((len + PAGE_SIZE - 1) >> PAGE_SHIFT) - 1); + /* Note: addrs is the address + page_count - 1 + * The firmware expects blocks after the first to be page- + * aligned and a multiple of the page size + */ + WARN_ON(len == 0); + WARN_ON(i && + (i != (dma_buffers - 1)) && (len & ~PAGE_MASK)); + WARN_ON(i && (addr & ~PAGE_MASK)); + WARN_ON(upper_32_bits(addr) > 0xf); + if (k > 0 && + ((addrs[k - 1] & ~0xff) + + (((addrs[k - 1] & 0xff) + 1) << 8) + == page_id)) { + u32 inc_pages = min(sg_pages, + 0xff - (addrs[k - 1] & 0xff)); + addrs[k - 1] += inc_pages; + page_id += inc_pages << 8; + sg_pages -= inc_pages; + } + while (sg_pages) { + u32 inc_pages = min(sg_pages, 0x100u); + addrs[k++] = page_id | (inc_pages - 1); + page_id += inc_pages << 8; + sg_pages -= inc_pages; + } + } + } else { + for_each_sg(scatterlist, sg, dma_buffers, i) { + u32 len = sg_dma_len(sg); + u32 addr = VC_SAFE(sg_dma_address(sg)); + u32 new_pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + + /* Note: addrs is the address + page_count - 1 + * The firmware expects blocks after the first to be page- + * aligned and a multiple of the page size + */ + WARN_ON(len == 0); + WARN_ON(i && (i != (dma_buffers - 1)) && (len & ~PAGE_MASK)); + WARN_ON(i && (addr & ~PAGE_MASK)); + if (k > 0 && + ((addrs[k - 1] & PAGE_MASK) + + (((addrs[k - 1] & ~PAGE_MASK) + 1) << PAGE_SHIFT)) + == (addr & PAGE_MASK)) + addrs[k - 1] += new_pages; + else + addrs[k++] = (addr & PAGE_MASK) | (new_pages - 1); + } } /* Partial cache lines (fragments) require special measures */ diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c index d1a556f16499..dd3c8f829daa 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c @@ -117,6 +117,11 @@ static struct vchiq_drvdata bcm2836_drvdata = { .cache_line_size = 64, }; +static struct vchiq_drvdata bcm2711_drvdata = { + .cache_line_size = 64, + .use_36bit_addrs = true, +}; + static const char *const ioctl_names[] = { "CONNECT", "SHUTDOWN", @@ -2710,6 +2715,7 @@ void vchiq_platform_conn_state_changed(struct vchiq_state *state, static const struct of_device_id vchiq_of_match[] = { { .compatible = "brcm,bcm2835-vchiq", .data = &bcm2835_drvdata }, { .compatible = "brcm,bcm2836-vchiq", .data = &bcm2836_drvdata }, + { .compatible = "brcm,bcm2711-vchiq", .data = &bcm2711_drvdata }, {}, }; MODULE_DEVICE_TABLE(of, vchiq_of_match); diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h index 0784c5002417..f8b1c005af62 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h @@ -52,6 +52,7 @@ struct vchiq_arm_state { struct vchiq_drvdata { const unsigned int cache_line_size; + const bool use_36bit_addrs; struct rpi_firmware *fw; };