diff mbox series

[03/10] soc: fujitsu: hwb: Add IOC_BB_ALLOC ioctl

Message ID 20210108105241.1757799-4-misono.tomohiro@jp.fujitsu.com (mailing list archive)
State Changes Requested
Headers show
Series Add Fujitsu A64FX soc entry/hardware barrier driver | expand

Commit Message

Misono Tomohiro Jan. 8, 2021, 10:52 a.m. UTC
IOC_BB_ALLOC ioctl initialize INIT_SYNC register which represents
PEs in a CMG joining synchronization. Although we get cpumask of
PEs from userspace, INIT_SYNC register requires mask value based
on physical PE number which is written in each PE's BST register.
So we perform conversion of cpumask value in validate_and_conver_pemask().

Since INIT_SYNC register is a shared resource per CMG, we pick
up one PE and send IPI to it to write the register.

Signed-off-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
---
 drivers/soc/fujitsu/fujitsu_hwb.c      | 223 +++++++++++++++++++++++++
 include/uapi/linux/fujitsu_hpc_ioctl.h |  23 +++
 2 files changed, 246 insertions(+)
 create mode 100644 include/uapi/linux/fujitsu_hpc_ioctl.h

Comments

Arnd Bergmann Jan. 8, 2021, 1:22 p.m. UTC | #1
On Fri, Jan 8, 2021 at 11:52 AM Misono Tomohiro
<misono.tomohiro@jp.fujitsu.com> wrote:

> +static void write_init_sync_reg(void *args)
> +{
> +       struct init_sync_args *sync_args = (struct init_sync_args *)args;
> +
> +       switch (sync_args->bb) {
> +       case 0:
> +               write_sysreg_s(sync_args->val, FHWB_INIT_SYNC_BB0_EL1);
> +               break;
> +       case 1:
> +               write_sysreg_s(sync_args->val, FHWB_INIT_SYNC_BB1_EL1);
> +               break;
> +       case 2:
> +               write_sysreg_s(sync_args->val, FHWB_INIT_SYNC_BB2_EL1);
> +               break;
> +       case 3:
> +               write_sysreg_s(sync_args->val, FHWB_INIT_SYNC_BB3_EL1);
> +               break;
> +       case 4:
> +               write_sysreg_s(sync_args->val, FHWB_INIT_SYNC_BB4_EL1);
> +               break;
> +       case 5:
> +               write_sysreg_s(sync_args->val, FHWB_INIT_SYNC_BB5_EL1);
> +               break;
> +       }
> +}

(minor style comment

I think this could be simplified into a single write_sysreg_s() with the
register number calculated based on sync_args->bb, rather than duplicating
the same three lines six times.

> +static int ioc_bb_alloc(struct file *filp, void __user *argp)
> +{
> +       struct hwb_private_data *pdata = (struct hwb_private_data *)filp->private_data;

A slightly better way to write the ioctl command specific functions
is to just give the argument the correct type (struct
fujitsu_hwb_ioc_bb_ctl __user*)
instead of 'void __user *', to avoid the cast.

Similarly, as you don't use 'filp' itself, just pass the struct hwb_private_data
pointer as the first argument.

> @@ -164,6 +386,7 @@ static int fujitsu_hwb_dev_open(struct inode *inode, struct file *filp)
>  static const struct file_operations fujitsu_hwb_dev_fops = {
>         .owner          = THIS_MODULE,
>         .open           = fujitsu_hwb_dev_open,
> +       .unlocked_ioctl = fujitsu_hwb_dev_ioctl,
>  };

All drivers with an ioctl interface should work in compat mode as well,
so please add a

       .compat_ioctl = compat_ptr_ioctl;

> +#define __FUJITSU_IOCTL_MAGIC 'F'

It's hard to find a non-conflicting range of ioctl numbers, but
it would be good to note the conflict in

Documentation/userspace-api/ioctl/ioctl-number.rst

The 'F' range is also used in framebuffer drivers.

> +/* ioctl definitions for hardware barrier driver */
> +struct fujitsu_hwb_ioc_bb_ctl {
> +       __u8 cmg;
> +       __u8 bb;
> +       __u8 unused[2];
> +       __u32 size;
> +       unsigned long __user *pemask;
> +};

However, this structure layout is incompatible with 32-bit user
space because of the indirect pointer. See
Documentation/driver-api/ioctl.rst for how to encode a
pointer in a __u64 member.

      Arnd
Tomohiro Misono (Fujitsu) Jan. 12, 2021, 11:02 a.m. UTC | #2
> On Fri, Jan 8, 2021 at 11:52 AM Misono Tomohiro
> <misono.tomohiro@jp.fujitsu.com> wrote:
> 
> > +static void write_init_sync_reg(void *args)
> > +{
> > +       struct init_sync_args *sync_args = (struct init_sync_args *)args;
> > +
> > +       switch (sync_args->bb) {
> > +       case 0:
> > +               write_sysreg_s(sync_args->val, FHWB_INIT_SYNC_BB0_EL1);
> > +               break;
> > +       case 1:
> > +               write_sysreg_s(sync_args->val, FHWB_INIT_SYNC_BB1_EL1);
> > +               break;
> > +       case 2:
> > +               write_sysreg_s(sync_args->val, FHWB_INIT_SYNC_BB2_EL1);
> > +               break;
> > +       case 3:
> > +               write_sysreg_s(sync_args->val, FHWB_INIT_SYNC_BB3_EL1);
> > +               break;
> > +       case 4:
> > +               write_sysreg_s(sync_args->val, FHWB_INIT_SYNC_BB4_EL1);
> > +               break;
> > +       case 5:
> > +               write_sysreg_s(sync_args->val, FHWB_INIT_SYNC_BB5_EL1);
> > +               break;
> > +       }
> > +}
> 
> (minor style comment
> 
> I think this could be simplified into a single write_sysreg_s() with the
> register number calculated based on sync_args->bb, rather than duplicating
> the same three lines six times.

I think msr/mrs instructions needs register names at compile time so
it cannot be calculate dynamically. Or am I misunderstood?

> > +static int ioc_bb_alloc(struct file *filp, void __user *argp)
> > +{
> > +       struct hwb_private_data *pdata = (struct hwb_private_data *)filp->private_data;
> 
> A slightly better way to write the ioctl command specific functions
> is to just give the argument the correct type (struct
> fujitsu_hwb_ioc_bb_ctl __user*)
> instead of 'void __user *', to avoid the cast.
> 
> Similarly, as you don't use 'filp' itself, just pass the struct hwb_private_data
> pointer as the first argument.

thanks, I will follow this advise.
 
> > @@ -164,6 +386,7 @@ static int fujitsu_hwb_dev_open(struct inode *inode, struct file *filp)
> >  static const struct file_operations fujitsu_hwb_dev_fops = {
> >         .owner          = THIS_MODULE,
> >         .open           = fujitsu_hwb_dev_open,
> > +       .unlocked_ioctl = fujitsu_hwb_dev_ioctl,
> >  };
> 
> All drivers with an ioctl interface should work in compat mode as well,
> so please add a
> 
>        .compat_ioctl = compat_ptr_ioctl;


A64FX does not support 32-bit mode (aarch32 state).
So I think unlockd_ioctl is enough or is it better to use compat_ioctl anyway?

> 
> > +#define __FUJITSU_IOCTL_MAGIC 'F'
> 
> It's hard to find a non-conflicting range of ioctl numbers, but
> it would be good to note the conflict in
> 
> Documentation/userspace-api/ioctl/ioctl-number.rst
> 
> The 'F' range is also used in framebuffer drivers.

I didn't notice this, thanks for pointing out.

Again, thanks for all the reviews/comments.
Tomohiro

> > +/* ioctl definitions for hardware barrier driver */
> > +struct fujitsu_hwb_ioc_bb_ctl {
> > +       __u8 cmg;
> > +       __u8 bb;
> > +       __u8 unused[2];
> > +       __u32 size;
> > +       unsigned long __user *pemask;
> > +};
> 
> However, this structure layout is incompatible with 32-bit user
> space because of the indirect pointer. See
> Documentation/driver-api/ioctl.rst for how to encode a
> pointer in a __u64 member.
Arnd Bergmann Jan. 12, 2021, 12:34 p.m. UTC | #3
On Tue, Jan 12, 2021 at 12:02 PM misono.tomohiro@fujitsu.com
<misono.tomohiro@fujitsu.com> wrote:
> > On Fri, Jan 8, 2021 at 11:52 AM Misono Tomohiro
> > <misono.tomohiro@jp.fujitsu.com> wrote:
> >> >
> > I think this could be simplified into a single write_sysreg_s() with the
> > register number calculated based on sync_args->bb, rather than duplicating
> > the same three lines six times.
>
> I think msr/mrs instructions needs register names at compile time so
> it cannot be calculate dynamically. Or am I misunderstood?

You are correct, I didn't realize it was implemented using a string
concatenation macro and that an inline function version would be
tricky.

> > > @@ -164,6 +386,7 @@ static int fujitsu_hwb_dev_open(struct inode *inode, struct file *filp)
> > >  static const struct file_operations fujitsu_hwb_dev_fops = {
> > >         .owner          = THIS_MODULE,
> > >         .open           = fujitsu_hwb_dev_open,
> > > +       .unlocked_ioctl = fujitsu_hwb_dev_ioctl,
> > >  };
> >
> > All drivers with an ioctl interface should work in compat mode as well,
> > so please add a
> >
> >        .compat_ioctl = compat_ptr_ioctl;
>
>
> A64FX does not support 32-bit mode (aarch32 state).
> So I think unlockd_ioctl is enough or is it better to use compat_ioctl anyway?

It's a good point that this is not supported, but I would suggest adding
it anyway out of principle. It's better to always write code in a portable
way even if you do not expect to need the portability.

       Arnd
diff mbox series

Patch

diff --git a/drivers/soc/fujitsu/fujitsu_hwb.c b/drivers/soc/fujitsu/fujitsu_hwb.c
index 1dec3d3c652f..24d1bb00f55c 100644
--- a/drivers/soc/fujitsu/fujitsu_hwb.c
+++ b/drivers/soc/fujitsu/fujitsu_hwb.c
@@ -38,6 +38,8 @@ 
 #include <linux/slab.h>
 #include <linux/wait.h>
 
+#include <linux/fujitsu_hpc_ioctl.h>
+
 #ifdef pr_fmt
 #undef pr_fmt
 #endif
@@ -142,6 +144,226 @@  struct bb_info {
 };
 static struct kmem_cache *bb_info_cachep;
 
+static void free_bb_info(struct kref *kref)
+{
+	struct bb_info *bb_info = container_of(kref, struct bb_info, kref);
+
+	free_cpumask_var(bb_info->assigned_pemask);
+	free_cpumask_var(bb_info->pemask);
+	kfree(bb_info->bw);
+	kmem_cache_free(bb_info_cachep, bb_info);
+}
+
+static struct bb_info *alloc_bb_info(void)
+{
+	struct bb_info *bb_info;
+
+	bb_info = kmem_cache_zalloc(bb_info_cachep, GFP_KERNEL);
+	if (!bb_info)
+		return NULL;
+
+	bb_info->bw = kcalloc(_hwinfo.max_pe_per_cmg, sizeof(u8), GFP_KERNEL);
+	if (!bb_info->bw) {
+		free_bb_info(&bb_info->kref);
+		return NULL;
+	}
+	if (!zalloc_cpumask_var(&bb_info->pemask, GFP_KERNEL) ||
+		!zalloc_cpumask_var(&bb_info->assigned_pemask, GFP_KERNEL)) {
+		free_bb_info(&bb_info->kref);
+		return NULL;
+	}
+
+	init_waitqueue_head(&bb_info->wq);
+	kref_init(&bb_info->kref);
+
+	return bb_info;
+}
+
+static inline void put_bb_info(struct bb_info *bb_info)
+{
+	kref_put(&bb_info->kref, free_bb_info);
+}
+
+/* Validate pemask's range and convert it to a mask based on physical PE number */
+static int validate_and_convert_pemask(struct bb_info *bb_info, unsigned long *phys_pemask)
+{
+	int cpu;
+	u8 cmg;
+
+	if (cpumask_weight(bb_info->pemask) < 2) {
+		pr_err("pemask needs at least two bit set: %*pbl\n",
+						cpumask_pr_args(bb_info->pemask));
+		return -EINVAL;
+	}
+
+	if (!cpumask_subset(bb_info->pemask, cpu_online_mask)) {
+		pr_err("pemask needs to be subset of online cpu: %*pbl, %*pbl\n",
+			cpumask_pr_args(bb_info->pemask), cpumask_pr_args(cpu_online_mask));
+		return -EINVAL;
+	}
+
+	/*
+	 * INIT_SYNC register requires a mask value based on physical PE number.
+	 * So convert pemask to it while checking if all PEs belongs to the same CMG
+	 */
+	cpu = cpumask_first(bb_info->pemask);
+	cmg = _hwinfo.core_map[cpu].cmg;
+	*phys_pemask = 0;
+	for_each_cpu(cpu, bb_info->pemask) {
+		if (_hwinfo.core_map[cpu].cmg != cmg) {
+			pr_err("All PEs must belong to the same CMG: %*pbl\n",
+							cpumask_pr_args(bb_info->pemask));
+			return -EINVAL;
+		}
+		set_bit(_hwinfo.core_map[cpu].ppe, phys_pemask);
+	}
+	bb_info->cmg = cmg;
+
+	pr_debug("pemask: %*pbl, physical_pemask: %lx\n",
+					cpumask_pr_args(bb_info->pemask), *phys_pemask);
+
+	return 0;
+}
+
+/* Search free BB in_hwinfo->used_bb_bitmap[cmg] */
+static int search_free_bb(u8 cmg)
+{
+	int i;
+
+	for (i = 0; i < _hwinfo.num_bb; i++) {
+		if (!test_and_set_bit(i, &_hwinfo.used_bb_bmap[cmg])) {
+			pr_debug("Use BB %u in CMG %u, bitmap: %lx\n",
+							i, cmg, _hwinfo.used_bb_bmap[cmg]);
+			return i;
+		}
+	}
+
+	pr_err("All barrier blade is currently used in CMG %u\n", cmg);
+	return -EBUSY;
+}
+
+struct init_sync_args {
+	u64 val;
+	u8 bb;
+};
+
+static void write_init_sync_reg(void *args)
+{
+	struct init_sync_args *sync_args = (struct init_sync_args *)args;
+
+	switch (sync_args->bb) {
+	case 0:
+		write_sysreg_s(sync_args->val, FHWB_INIT_SYNC_BB0_EL1);
+		break;
+	case 1:
+		write_sysreg_s(sync_args->val, FHWB_INIT_SYNC_BB1_EL1);
+		break;
+	case 2:
+		write_sysreg_s(sync_args->val, FHWB_INIT_SYNC_BB2_EL1);
+		break;
+	case 3:
+		write_sysreg_s(sync_args->val, FHWB_INIT_SYNC_BB3_EL1);
+		break;
+	case 4:
+		write_sysreg_s(sync_args->val, FHWB_INIT_SYNC_BB4_EL1);
+		break;
+	case 5:
+		write_sysreg_s(sync_args->val, FHWB_INIT_SYNC_BB5_EL1);
+		break;
+	}
+}
+
+/* Send IPI to initialize INIT_SYNC register */
+static void setup_bb(struct bb_info *bb_info, unsigned long phys_pemask)
+{
+	struct init_sync_args args = {0};
+	int cpu;
+
+	/* INIT_SYNC register is shared resource in CMG. Pick one PE to set it up */
+	cpu = cpumask_any(bb_info->pemask);
+
+	args.bb = bb_info->bb;
+	args.val = FIELD_PREP(FHWB_INIT_SYNC_BB_EL1_MASK_FIELD, phys_pemask);
+	on_each_cpu_mask(cpumask_of(cpu), write_init_sync_reg, &args, 1);
+
+	pr_debug("Setup bb. cpu: %d, CMG: %u, BB: %u, bimtap: %lx\n",
+			cpu, bb_info->cmg, bb_info->bb, _hwinfo.used_bb_bmap[bb_info->cmg]);
+}
+
+static int ioc_bb_alloc(struct file *filp, void __user *argp)
+{
+	struct hwb_private_data *pdata = (struct hwb_private_data *)filp->private_data;
+	struct fujitsu_hwb_ioc_bb_ctl bb_ctl;
+	struct bb_info *bb_info;
+	unsigned long physical_pemask;
+	unsigned int size;
+	int ret;
+
+	if (copy_from_user(&bb_ctl, (struct fujitsu_hwb_ioc_bb_ctl __user *)argp,
+						sizeof(struct fujitsu_hwb_ioc_bb_ctl)))
+		return -EFAULT;
+
+	bb_info = alloc_bb_info();
+	if (!bb_info)
+		return -ENOMEM;
+
+	/* cpumask size may vary in user and kernel space. Use the smaller one */
+	size = min(cpumask_size(), bb_ctl.size);
+	if (copy_from_user(bb_info->pemask, bb_ctl.pemask, size)) {
+		ret = -EFAULT;
+		goto put_bb_info;
+	}
+
+	ret = validate_and_convert_pemask(bb_info, &physical_pemask);
+	if (ret < 0)
+		goto put_bb_info;
+
+	ret = search_free_bb(bb_info->cmg);
+	if (ret < 0)
+		goto put_bb_info;
+	bb_info->bb = ret;
+
+	/* Copy back CMG/BB number to be used to user */
+	bb_ctl.cmg = bb_info->cmg;
+	bb_ctl.bb  = bb_info->bb;
+	if (copy_to_user((struct fujitsu_hwb_ioc_bb_ctl __user *)argp, &bb_ctl,
+						sizeof(struct fujitsu_hwb_ioc_bb_ctl))) {
+		ret = -EFAULT;
+		clear_bit(bb_ctl.bb, &_hwinfo.used_bb_bmap[bb_ctl.cmg]);
+		goto put_bb_info;
+	}
+
+	setup_bb(bb_info, physical_pemask);
+
+	spin_lock(&pdata->list_lock);
+	list_add_tail(&bb_info->node, &pdata->bb_list);
+	spin_unlock(&pdata->list_lock);
+
+	return 0;
+
+put_bb_info:
+	put_bb_info(bb_info);
+
+	return ret;
+}
+
+static long fujitsu_hwb_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+	int ret;
+
+	switch (cmd) {
+	case FUJITSU_HWB_IOC_BB_ALLOC:
+		ret = ioc_bb_alloc(filp, argp);
+		break;
+	default:
+		ret = -ENOTTY;
+		break;
+	}
+
+	return ret;
+}
+
 static int fujitsu_hwb_dev_open(struct inode *inode, struct file *filp)
 {
 	struct hwb_private_data *pdata;
@@ -164,6 +386,7 @@  static int fujitsu_hwb_dev_open(struct inode *inode, struct file *filp)
 static const struct file_operations fujitsu_hwb_dev_fops = {
 	.owner          = THIS_MODULE,
 	.open           = fujitsu_hwb_dev_open,
+	.unlocked_ioctl = fujitsu_hwb_dev_ioctl,
 };
 
 static struct miscdevice bar_miscdev = {
diff --git a/include/uapi/linux/fujitsu_hpc_ioctl.h b/include/uapi/linux/fujitsu_hpc_ioctl.h
new file mode 100644
index 000000000000..c87a5bad3f59
--- /dev/null
+++ b/include/uapi/linux/fujitsu_hpc_ioctl.h
@@ -0,0 +1,23 @@ 
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/* Copyright 2020 FUJITSU LIMITED */
+#ifndef _UAPI_LINUX_FUJITSU_HPC_IOC_H
+#define _UAPI_LINUX_FUJITSU_HPC_IOC_H
+
+#include <linux/ioctl.h>
+#include <asm/types.h>
+
+#define __FUJITSU_IOCTL_MAGIC 'F'
+
+/* ioctl definitions for hardware barrier driver */
+struct fujitsu_hwb_ioc_bb_ctl {
+	__u8 cmg;
+	__u8 bb;
+	__u8 unused[2];
+	__u32 size;
+	unsigned long __user *pemask;
+};
+
+#define FUJITSU_HWB_IOC_BB_ALLOC _IOWR(__FUJITSU_IOCTL_MAGIC, \
+	0x00, struct fujitsu_hwb_ioc_bb_ctl)
+
+#endif /* _UAPI_LINUX_FUJITSU_HPC_IOC_H */