diff mbox series

[v5,07/10] platform/x86/intel/ifs: Add scan test support

Message ID 20220428153849.295779-8-tony.luck@intel.com (mailing list archive)
State Deferred, archived
Headers show
Series Introduce In Field Scan driver | expand

Commit Message

Luck, Tony April 28, 2022, 3:38 p.m. UTC
From: Jithu Joseph <jithu.joseph@intel.com>

In a core, the scan engine is shared between sibling cpus.

When a Scan test (for a particular core) is triggered by the user,
worker threads for each sibling cpus(belonging to that core) are
queued to execute the scan test function in the Workqueue context.

All the siblings rendezvous before the test execution. The scan
results are same for all siblings.

Scan may be aborted by some reasons. Scan test will be aborted in certain
circumstances such as when interrupt occurred or cpu does not have enough
power budget for scan. In this case, the kernel restart scan from the chunk
where it stopped. Scan will also be aborted when the test is failed. In
this case, the test is immediately stopped without retry.

Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Jithu Joseph <jithu.joseph@intel.com>
Co-developed-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/platform/x86/intel/ifs/Makefile  |   2 +-
 drivers/platform/x86/intel/ifs/core.c    |   5 +
 drivers/platform/x86/intel/ifs/ifs.h     |  48 ++++
 drivers/platform/x86/intel/ifs/runtest.c | 327 +++++++++++++++++++++++
 4 files changed, 381 insertions(+), 1 deletion(-)
 create mode 100644 drivers/platform/x86/intel/ifs/runtest.c

Comments

Thomas Gleixner May 4, 2022, 12:29 p.m. UTC | #1
On Thu, Apr 28 2022 at 08:38, Tony Luck wrote:
> +static bool wait_for_siblings(struct device *dev, struct ifs_data *ifsd, atomic_t *t, long long timeout)
> +{
> +	atomic_inc(t);
> +	while (atomic_read(t) < cpu_sibl_ct) {
> +		if (timeout < SPINUNIT) {
> +			dev_err(dev,
> +				"Timeout while waiting for CPUs rendezvous, remaining: %d\n",
> +				cpu_sibl_ct - atomic_read(t));
> +			return false;
> +		}
> +
> +		ndelay(SPINUNIT);
> +		timeout -= SPINUNIT;
> +
> +		touch_nmi_watchdog();
> +	}
> +
> +	return true;
> +}
> +
> +/*
> + * When a Scan test (for a particular core) is triggered by the user, worker threads
> + * for each sibling cpus(belonging to that core) are queued to execute this function in
> + * the Workqueue (ifs_wq) context.
> + * Wait for the sibling thread to join before the execution.
> + * Execute the scan test by running wrmsr(MSR_ACTIVATE_SCAN).
> + */
> +static void ifs_work_func(struct work_struct *work)
> +{
> +	struct ifs_work *local_work = container_of(work, struct ifs_work, w);
> +	int cpu = smp_processor_id();
> +	union ifs_scan activate;
> +	union ifs_status status;
> +	unsigned long timeout;
> +	struct ifs_data *ifsd;
> +	struct device *dev;
> +	int retries;
> +	u32 first;
> +
> +	dev = local_work->dev;
> +	ifsd = ifs_get_data(dev);
> +
> +	activate.rsvd = 0;
> +	activate.delay = msec_to_tsc(THREAD_WAIT);
> +	activate.sigmce = 0;
> +
> +	/*
> +	 * Need to get (and keep) the threads on this core executing close together
> +	 * so that the writes to MSR_ACTIVATE_SCAN below will succeed in entering
> +	 * IFS test mode on this core. Interrupts on each thread are expected to be
> +	 * brief. But preemption would be a problem.
> +	 */
> +	preempt_disable();
> +
> +	/* wait for the sibling threads to join */
> +	first = cpumask_first(topology_sibling_cpumask(cpu));
> +	if (!wait_for_siblings(dev, ifsd, &siblings_in, NSEC_PER_SEC)) {

Waiting for a second with preemption disabled? Seriously?

> +		preempt_enable();
> +		dev_err(dev, "cpu %d sibling did not join rendezvous\n", cpu);
> +		goto out;
> +	}
> +
> +	activate.start = 0;
> +	activate.stop = ifsd->valid_chunks - 1;
> +	timeout = jiffies + HZ / 2;

Plus another half a second with preemption disabled. That's just insane.

> +	retries = MAX_IFS_RETRIES;
> +
> +	while (activate.start <= activate.stop) {
> +		if (time_after(jiffies, timeout)) {
> +			status.error_code = IFS_SW_TIMEOUT;
> +			break;
> +		}
> +
> +		local_irq_disable();
> +		wrmsrl(MSR_ACTIVATE_SCAN, activate.data);
> +		local_irq_enable();

That local_irq_disable() solves what?

> +		/*
> +		 * All logical CPUs on this core are now running IFS test. When it completes
> +		 * execution or is interrupted, the following RDMSR gets the scan status.
> +		 */
> +
> +		rdmsrl(MSR_SCAN_STATUS, status.data);

Wait. Is that rdmsrl() blocking execution until the scan completes?

If so, what's the stall time here? If not, how is the logic below
supposed to work?

> +		/* Some cases can be retried, give up for others */
> +		if (!can_restart(status))
> +			break;
> +
> +		if (status.chunk_num == activate.start) {
> +			/* Check for forward progress */
> +			if (retries-- == 0) {
> +				if (status.error_code == IFS_NO_ERROR)
> +					status.error_code = IFS_SW_PARTIAL_COMPLETION;
> +				break;
> +			}
> +		} else {
> +			retries = MAX_IFS_RETRIES;
> +			activate.start = status.chunk_num;
> +		}
> +	}
> +
> +	preempt_enable();
> +
> +	if (cpu == first) {
> +		/* Update status for this core */
> +		ifsd->scan_details = status.data;
> +
> +		if (status.control_error || status.signature_error) {
> +			ifsd->status = SCAN_TEST_FAIL;
> +			message_fail(dev, cpu, status);
> +		} else if (status.error_code) {
> +			ifsd->status = SCAN_NOT_TESTED;
> +			message_not_tested(dev, cpu, status);
> +		} else {
> +			ifsd->status = SCAN_TEST_PASS;
> +		}
> +	}
> +
> +	if (!wait_for_siblings(dev, ifsd, &siblings_out, NSEC_PER_SEC))
> +		dev_err(dev, "cpu %d sibling did not exit rendezvous\n", cpu);
> +
> +out:
> +	if (cpu == first)
> +		complete(&test_thread_done);
> +}
> +
> +/*
> + * Initiate per core test. It wakes up work queue threads on the target cpu and
> + * its sibling cpu. Once all sibling threads wake up, the scan test gets executed and
> + * wait for all sibling threads to finish the scan test.
> + */
> +int do_core_test(int cpu, struct device *dev)
> +{
> +	struct ifs_work *local_work;
> +	int sibling;
> +	int ret = 0;
> +	int i = 0;
> +
> +	if (!scan_enabled)
> +		return -ENXIO;
> +
> +	cpu_hotplug_disable();

Why cpu_hotplug_disable()? Why is cpus_read_lock() not sufficient here?

> +	if (!cpu_online(cpu)) {
> +		dev_info(dev, "cannot test on the offline cpu %d\n", cpu);
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
> +	reinit_completion(&test_thread_done);
> +	atomic_set(&siblings_in, 0);
> +	atomic_set(&siblings_out, 0);
> +
> +	cpu_sibl_ct = cpumask_weight(topology_sibling_cpumask(cpu));
> +	local_work = kcalloc(cpu_sibl_ct, sizeof(*local_work), GFP_NOWAIT);

Why does this need GFP_NOWAIT?

> +int ifs_setup_wq(void)
> +{
> +	/* Flags are to keep all the sibling cpu worker threads (of a core) in close sync */

I put that into the wishful thinking realm.

Is there anywhere a proper specification of this mechanism? The public
available MSR list in the SDM is uselss.

Without proper documentation it's pretty much impossible to review this
code and to think about the approach.

Thanks,

        tglx
Luck, Tony May 4, 2022, 6:52 p.m. UTC | #2
On Wed, May 04, 2022 at 02:29:33PM +0200, Thomas Gleixner wrote:
> On Thu, Apr 28 2022 at 08:38, Tony Luck wrote:
> > +
> > +	/* wait for the sibling threads to join */
> > +	first = cpumask_first(topology_sibling_cpumask(cpu));
> > +	if (!wait_for_siblings(dev, ifsd, &siblings_in, NSEC_PER_SEC)) {
> 
> Waiting for a second with preemption disabled? Seriously?

Probably won't ever wait for a second. Any suggestions for a reasonable
timeout for how long it might take before both threads on a core begin
executing the target code after a pair of:

	queue_work_on(sibling, ifs_wq, &local_work[i].w);

that the request to check this core fired off?

Possibly this could be dropped, and just built into the allowable delay
for the threads to execute the ACTIVATE_SCAN (31 bits of TSC cycles in
the value written to the MSR). But a future scan feature doesn't include
that user tuneable value.

> 
> > +		preempt_enable();
> > +		dev_err(dev, "cpu %d sibling did not join rendezvous\n", cpu);
> > +		goto out;
> > +	}
> > +
> > +	activate.start = 0;
> > +	activate.stop = ifsd->valid_chunks - 1;
> > +	timeout = jiffies + HZ / 2;
> 
> Plus another half a second with preemption disabled. That's just insane.

Another rounded up value. Experimentally we are seeing the core scan
test take aroun 50ms. The spec (I know, we haven't published the spec)
says "up to 200ms".

> 
> > +	retries = MAX_IFS_RETRIES;
> > +
> > +	while (activate.start <= activate.stop) {
> > +		if (time_after(jiffies, timeout)) {
> > +			status.error_code = IFS_SW_TIMEOUT;
> > +			break;
> > +		}
> > +
> > +		local_irq_disable();
> > +		wrmsrl(MSR_ACTIVATE_SCAN, activate.data);
> > +		local_irq_enable();
> 
> That local_irq_disable() solves what?

An interrupt will stop the currently running "chunk" of the scan.
It is a restartable case. But the concern is that with high rate of
interrupts the scan may not complete (or even make any forward
progress).

> 
> > +		/*
> > +		 * All logical CPUs on this core are now running IFS test. When it completes
> > +		 * execution or is interrupted, the following RDMSR gets the scan status.
> > +		 */
> > +
> > +		rdmsrl(MSR_SCAN_STATUS, status.data);
> 
> Wait. Is that rdmsrl() blocking execution until the scan completes?

The comment isn't quite accurate here (my fault). The WRMSR doesn't
retire until the scan stops (either because it completed, or because
some thing happend to stop before all chunks were processed).

So the two HT threads will continue after the WRMSR pretty much in
lockstep and do the RDMSR to get the status. Both will see the same
status in most cases.

> 
> If so, what's the stall time here? If not, how is the logic below
> supposed to work?

Exact time will depend how many chunks of the scan were completed, and
how long they took. I see 50 ms total on current test system.

> 
> > +		/* Some cases can be retried, give up for others */
> > +		if (!can_restart(status))
> > +			break;
> > +
> > +		if (status.chunk_num == activate.start) {
> > +			/* Check for forward progress */
> > +			if (retries-- == 0) {
> > +				if (status.error_code == IFS_NO_ERROR)
> > +					status.error_code = IFS_SW_PARTIAL_COMPLETION;
> > +				break;
> > +			}
> > +		} else {
> > +			retries = MAX_IFS_RETRIES;
> > +			activate.start = status.chunk_num;
> > +		}
> > +	}
> > +


> > +int do_core_test(int cpu, struct device *dev)
> > +{
> > +	struct ifs_work *local_work;
> > +	int sibling;
> > +	int ret = 0;
> > +	int i = 0;
> > +
> > +	if (!scan_enabled)
> > +		return -ENXIO;
> > +
> > +	cpu_hotplug_disable();
> 
> Why cpu_hotplug_disable()? Why is cpus_read_lock() not sufficient here?

May be left from earlier version. I'll check that cpus_read_lock() is
enough.

> 
> > +	if (!cpu_online(cpu)) {
> > +		dev_info(dev, "cannot test on the offline cpu %d\n", cpu);
> > +		ret = -EINVAL;
> > +		goto out;
> > +	}
> > +
> > +	reinit_completion(&test_thread_done);
> > +	atomic_set(&siblings_in, 0);
> > +	atomic_set(&siblings_out, 0);
> > +
> > +	cpu_sibl_ct = cpumask_weight(topology_sibling_cpumask(cpu));
> > +	local_work = kcalloc(cpu_sibl_ct, sizeof(*local_work), GFP_NOWAIT);
> 
> Why does this need GFP_NOWAIT?

It doesn't. Will fix.

> 
> > +int ifs_setup_wq(void)
> > +{
> > +	/* Flags are to keep all the sibling cpu worker threads (of a core) in close sync */
> 
> I put that into the wishful thinking realm.

Can change to "... to try to keep ..."
> 
> Is there anywhere a proper specification of this mechanism? The public
> available MSR list in the SDM is uselss.
> 
> Without proper documentation it's pretty much impossible to review this
> code and to think about the approach.

Step 1 (at boot or driver load) is loading the scan tests into BIOS
reserved memory. I will fix up the bits where you pointed out problems
there.

Step 2 is the run time test of each core. That requires the near
simultaneous execution of:

	wrmsrl(MSR_ACTIVATE_SCAN, activate.data);

on all HT threads on the core. Trivial on parts that do not support
HT, or where it is disabled in BIOS. The above code is trying to
achieve this "parallel" execution.
The follow-on :

	rdmsrl(MSR_SCAN_STATUS, status.data);

doesn't have to be synchronized ... but handy to do so for when not
all chunks were processed and need to loop back to run another
activate_scan to continue starting from the interrupted chunk. In
the lab, this seems common ... when scanning all cores many of them
complete all chunks in a single bite, but several take 2-3 times around
the loop before completing.

As noted above I'm seeing a core test take around 50ms (but spec says
up to 200ms). In some environments that doesn't require any special
application or system reconfiguration.  It's not much different from
time-slicing when you are running more processes (or guests) than you
have CPUs. So sysadmins in those environments can use this driver to
cylce through cores testing each in turn without any extra steps.

You've pointed out that the driver disables preemption for insanely
long amounts of time ... to use this driver to test cores on a system
running applications where that is an issue will require additonal steps
to migrate latency critical applications to a different core while the
test is in progess, also re-direct interrupts. That seems well beyond the
scope of what is possible in a driver without all the information about
what workloads are running to pick a new temporary home for processes
and interrupts while the core is being tested.

-Tony
Thomas Gleixner May 4, 2022, 11:15 p.m. UTC | #3
On Wed, May 04 2022 at 11:52, Luck, Tony wrote:
> On Wed, May 04, 2022 at 02:29:33PM +0200, Thomas Gleixner wrote:
>> On Thu, Apr 28 2022 at 08:38, Tony Luck wrote:
>> > +
>> > +	/* wait for the sibling threads to join */
>> > +	first = cpumask_first(topology_sibling_cpumask(cpu));
>> > +	if (!wait_for_siblings(dev, ifsd, &siblings_in, NSEC_PER_SEC)) {
>> 
>> Waiting for a second with preemption disabled? Seriously?
>
> Probably won't ever wait for a second. Any suggestions for a reasonable
> timeout for how long it might take before both threads on a core begin
> executing the target code after a pair of:
>
> 	queue_work_on(sibling, ifs_wq, &local_work[i].w);
>
> that the request to check this core fired off?

The real question is why you try to rendevouz CPUs via work queues.

The kernel has a well established mechanism to do CPU rendevouz already:

    stomp_machine()

We all hate it with a passion, but it is already doing what you are
trying to achieve and as the stopper threads run with high priority they
are not subject to arbitrary scheduling delays which make one CPU wait
for a long time with preemption disabled.

>> Plus another half a second with preemption disabled. That's just insane.
>
> Another rounded up value. Experimentally we are seeing the core scan
> test take aroun 50ms. The spec (I know, we haven't published the spec)
> says "up to 200ms".

That's daft. Both the 200ms and the non-published spec, though the latter
is worse because it's wasting everyones time.

>> > +	retries = MAX_IFS_RETRIES;
>> > +
>> > +	while (activate.start <= activate.stop) {
>> > +		if (time_after(jiffies, timeout)) {
>> > +			status.error_code = IFS_SW_TIMEOUT;
>> > +			break;
>> > +		}
>> > +
>> > +		local_irq_disable();
>> > +		wrmsrl(MSR_ACTIVATE_SCAN, activate.data);
>> > +		local_irq_enable();
>> 
>> That local_irq_disable() solves what?
>
> An interrupt will stop the currently running "chunk" of the scan.
> It is a restartable case. But the concern is that with high rate of
> interrupts the scan may not complete (or even make any forward
> progress).

What about NMI/MCE? What happens if the scan triggers an MCE?

If the scan is stopped, will it be stopped on both hyperthreads?

What happens in the case, when one of the CPUs is slightly behind the
other:

     CPU A                      CPU B
     local_irq_disable()
     wrmsrl(...);
                                <- Interrupt
                                   handle_irq();
                                local_irq_disable();
                                wrmsrl(...);

Will the interrupt which hit CPU B _after_ CPU A issued the MSR write
stop the operation on CPU A and make it return?

If not, then how long is CPU A waiting for CPU B to join the party?

>> > +		/*
>> > +		 * All logical CPUs on this core are now running IFS test. When it completes
>> > +		 * execution or is interrupted, the following RDMSR gets the scan status.
>> > +		 */
>> > +
>> > +		rdmsrl(MSR_SCAN_STATUS, status.data);
>> 
>> Wait. Is that rdmsrl() blocking execution until the scan completes?
>
> The comment isn't quite accurate here (my fault). The WRMSR doesn't
> retire until the scan stops (either because it completed, or because
> some thing happend to stop before all chunks were processed).

I suspected that due to the non-commented local_irq_disable() ...

>> If so, what's the stall time here? If not, how is the logic below
>> supposed to work?
>
> Exact time will depend how many chunks of the scan were completed, and
> how long they took. I see 50 ms total on current test system.

Per chunk or for all chunks? The interresting part is not the total
time, the interresting part is the time per chunk.

>> Is there anywhere a proper specification of this mechanism? The public
>> available MSR list in the SDM is uselss.
>> 
>> Without proper documentation it's pretty much impossible to review this
>> code and to think about the approach.

...

> Step 2 is the run time test of each core. That requires the near
> simultaneous execution of:
>
> 	wrmsrl(MSR_ACTIVATE_SCAN, activate.data);
>
> on all HT threads on the core. Trivial on parts that do not support
> HT, or where it is disabled in BIOS. The above code is trying to
> achieve this "parallel" execution.

How is that supposed to work on a system which has HT enabled in BIOS,
but disabled on the kernel command line or via /sys/..../smt/control or
when a HT sibling is offlined temporarily?

I assume it cannot work, but I can't see anything which handles those
cases.

> The follow-on :
>
> 	rdmsrl(MSR_SCAN_STATUS, status.data);
>
> doesn't have to be synchronized ... but handy to do so for when not
> all chunks were processed and need to loop back to run another
> activate_scan to continue starting from the interrupted chunk. In
> the lab, this seems common ... when scanning all cores many of them
> complete all chunks in a single bite, but several take 2-3 times around
> the loop before completing.

Is there a timeout for restarting an interrupted chunk?

> As noted above I'm seeing a core test take around 50ms (but spec says
> up to 200ms). In some environments that doesn't require any special
> application or system reconfiguration.  It's not much different from
> time-slicing when you are running more processes (or guests) than you
> have CPUs. So sysadmins in those environments can use this driver to
> cylce through cores testing each in turn without any extra steps.
>
> You've pointed out that the driver disables preemption for insanely
> long amounts of time ... to use this driver to test cores on a system
> running applications where that is an issue will require additonal steps
> to migrate latency critical applications to a different core while the
> test is in progess, also re-direct interrupts. That seems well beyond the
> scope of what is possible in a driver without all the information about
> what workloads are running to pick a new temporary home for processes
> and interrupts while the core is being tested.

I assume that's all described in Documentation/x86/intel-ifs.rst, which
was in patch 11/10 and unfortunately got lost in the intertubes.

Coming back to that rendevouz mechanism.

As far as I understand it, but of course my implementation of

   # pdforacle --remote=tony --condense sekrit-ifs-spec.pdf

might be suboptimal, the only hard requirement is to start the
scan for a particular chunk on all HT threads roughly at the same
time.

But there is no hard requirement that the individual chunks are started
right after each other or that a restart of an for whatever reason
interrupted chunk happens 'immediately'.

If that's the case and anything else would be an insanity, then you can
do something like this:

static DEFINE_PER_CPU(struct ifs_status, ifs_status);

int do_test(int cpu, struct device *dev)
{
        const struct cpumask *mask;
        struct ifsdata data;

        cpus_read_lock();
        mask = topology_sibling_cpumask(cpu);
        if (!sane(mask))
        	goto fail;

        for_each_cpu(sibling, mask)
        	init_ifs_status(sibling);

        init_data(data, dev);

        while (data.chunk < data.max_chunks) {
        	ret = stomp_cpumask(mask, doscan, data);

                if (!ret) {
                        data.chunk++;
                	continue;
                }

                // Analyze per CPU ifs_status to either
                // restart or abort with proper information
                // about the reason to abort
                .....
        }
        ....

We don't have stomp_cpumask() today, but that's trivial enough to
implement. Yes, we want to avoid expanding stomp_machine(), but trying
to resemble stomp_machine() with work queues is worse by several orders
of magnitude.

doscan() will boil down to:

    wrmsrl(RUN_SCAM, data->chunk);
    rdmsrl(STA_SCAM, status);
    this_cpu_write(ifs_status, status);
    return status ? -ECRAP : 0;

plus the required commentry which will be an order of magnitude more
lines than the actual code above.

Thanks,

        tglx
Peter Zijlstra May 5, 2022, 8:28 a.m. UTC | #4
On Thu, May 05, 2022 at 01:15:07AM +0200, Thomas Gleixner wrote:
> We don't have stomp_cpumask() today, but that's trivial enough to
> implement.

I don't think we want to gift people a random cpumask stop_machine(),
but here's one that stops a core. It runs the @fn on every cpu since I
thought to have understood that was the requirement for this muck.

*completely* untestededed.

diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 6da7b91af353..2e7324e44e38 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -631,6 +631,34 @@ int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
 }
 EXPORT_SYMBOL_GPL(stop_machine);
 
+/*
+ * stop_core_cpuslocked - stop_machine a core
+ * @cpu: any cpu in the targeted core
+ * @fn: the function to run
+ * @data: the data ptr for @fn()
+ *
+ * RETURNS:
+ * 0 if all executions of @fn returned 0, any non zero return value if any
+ * returned non zero.
+ */
+int stop_core_cpuslocked(unsigned int cpu, cpu_stop_fn_t fn, void *data)
+{
+	const struct cpumask *smt_mask = cpu_smt_mask(cpu);
+
+	struct multi_stop_data msdata = {
+		.fn = fn,
+		.data = data,
+		.num_threads = cpumask_weight(smt_mask);
+		.active_cpus = smt_mask,
+	};
+
+	lockdep_assert_cpus_held();
+
+	/* Set the initial state and stop all online cpus. */
+	set_state(&msdata, MULTI_STOP_PREPARE);
+	return stop_cpus(smt_mask, multi_cpu_stop, &msdata);
+}
+
 /**
  * stop_machine_from_inactive_cpu - stop_machine() from inactive CPU
  * @fn: the function to run
Thomas Gleixner May 5, 2022, 9:01 a.m. UTC | #5
On Thu, May 05 2022 at 10:28, Peter Zijlstra wrote:
> On Thu, May 05, 2022 at 01:15:07AM +0200, Thomas Gleixner wrote:
>> We don't have stomp_cpumask() today, but that's trivial enough to
>> implement.
>
> I don't think we want to gift people a random cpumask stop_machine(),

Fair enough.

> but here's one that stops a core. It runs the @fn on every cpu since I
> thought to have understood that was the requirement for this muck.

Yes.

> *completely* untestededed.

Looks about right neverthelessesseess.
Luck, Tony May 5, 2022, 6:32 p.m. UTC | #6
On Thu, May 05, 2022 at 11:01:27AM +0200, Thomas Gleixner wrote:
> On Thu, May 05 2022 at 10:28, Peter Zijlstra wrote:
> > On Thu, May 05, 2022 at 01:15:07AM +0200, Thomas Gleixner wrote:
> >> We don't have stomp_cpumask() today, but that's trivial enough to
> >> implement.
> >
> > I don't think we want to gift people a random cpumask stop_machine(),
> 
> Fair enough.
> 
> > but here's one that stops a core. It runs the @fn on every cpu since I
> > thought to have understood that was the requirement for this muck.
> 
> Yes.
> 
> > *completely* untestededed.
> 
> Looks about right neverthelessesseess.

Close enough. I made these changes:

1) Added EXPORT_SYMBOL_GPL()
2) Added protoype in <linux/stop_machine.h>
3) Moved the kerneldoc comment to the header (for some reason
   the other stop* functions document there).
4) Edited that kerneldoc a bit <<<< NEEDS REVIEW  >>>
5) Changed a ';' to a ',' to make it compile.

With that it works, and will do exactly what I need (with less code
in the IFS driver by the look of things).

A couple of thousand tests shows it works. The two threads
arrive within 20 TSC cycles of each other 60% of the time,
but I have some outliers up to 654 cycles ... which is plenty
close enough.

Patch now looks like this. Author credit to Peter ... are you willing
to add a Signed-off-by to stop checkpatch from whining at me?

There isn't a "Something-similar-suggested-by:" tag to credit Thomas
with this.  Perhaps "Inspired-by:"?

-Tony


From df5ca8024997d3d782978d154cfbff5147f451ad Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 5 May 2022 08:55:09 -0700
Subject: [PATCH] stop_machine: Add stop_core_cpuslocked() for per-core
 operations

Hardware core level testing features require near simultaneous execution
of WRMSR instructions on all threads of a core to initiate a test.

Provide a customized cut down version of stop_machine_cpuslocked() that
just operates on the threads of a single core.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/linux/stop_machine.h | 16 ++++++++++++++++
 kernel/stop_machine.c        | 19 +++++++++++++++++++
 2 files changed, 35 insertions(+)

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 46fb3ebdd16e..ea7a74ea7389 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -124,6 +124,22 @@ int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
  */
 int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
 
+/**
+ * stop_core_cpuslocked: - stop all threads on just one core
+ * @cpu: any cpu in the targeted core
+ * @fn: the function to run
+ * @data: the data ptr for @fn()
+ *
+ * Same as above, but instead of every CPU, only the logical CPUs of a
+ * single core are affected.
+ *
+ * Context: Must be called from within a cpus_read_lock() protected region.
+ *
+ * Return: 0 if all executions of @fn returned 0, any non zero return
+ * value if any returned non zero.
+ */
+int stop_core_cpuslocked(unsigned int cpu, cpu_stop_fn_t fn, void *data);
+
 int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
 				   const struct cpumask *cpus);
 #else	/* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index cbc30271ea4d..579761729836 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -633,6 +633,25 @@ int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
 }
 EXPORT_SYMBOL_GPL(stop_machine);
 
+int stop_core_cpuslocked(unsigned int cpu, cpu_stop_fn_t fn, void *data)
+{
+	const struct cpumask *smt_mask = cpu_smt_mask(cpu);
+
+	struct multi_stop_data msdata = {
+		.fn = fn,
+		.data = data,
+		.num_threads = cpumask_weight(smt_mask),
+		.active_cpus = smt_mask,
+	};
+
+	lockdep_assert_cpus_held();
+
+	/* Set the initial state and stop all online cpus. */
+	set_state(&msdata, MULTI_STOP_PREPARE);
+	return stop_cpus(smt_mask, multi_cpu_stop, &msdata);
+}
+EXPORT_SYMBOL_GPL(stop_core_cpuslocked);
+
 /**
  * stop_machine_from_inactive_cpu - stop_machine() from inactive CPU
  * @fn: the function to run
Peter Zijlstra May 5, 2022, 8:21 p.m. UTC | #7
On Thu, May 05, 2022 at 11:32:04AM -0700, Luck, Tony wrote:

> Patch now looks like this. Author credit to Peter ... are you willing
> to add a Signed-off-by to stop checkpatch from whining at me?

sure, see below.

> There isn't a "Something-similar-suggested-by:" tag to credit Thomas
> with this.  Perhaps "Inspired-by:"?

I'm all for creative one off tags, there's some marvelous ones in the
tree, but we always need more :-)

> -Tony
> 
> 
> From df5ca8024997d3d782978d154cfbff5147f451ad Mon Sep 17 00:00:00 2001
> From: Peter Zijlstra <peterz@infradead.org>
> Date: Thu, 5 May 2022 08:55:09 -0700
> Subject: [PATCH] stop_machine: Add stop_core_cpuslocked() for per-core
>  operations
> 
> Hardware core level testing features require near simultaneous execution
> of WRMSR instructions on all threads of a core to initiate a test.
> 
> Provide a customized cut down version of stop_machine_cpuslocked() that
> just operates on the threads of a single core.
> 
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
>  include/linux/stop_machine.h | 16 ++++++++++++++++
>  kernel/stop_machine.c        | 19 +++++++++++++++++++
>  2 files changed, 35 insertions(+)
> 
> diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
> index 46fb3ebdd16e..ea7a74ea7389 100644
> --- a/include/linux/stop_machine.h
> +++ b/include/linux/stop_machine.h
> @@ -124,6 +124,22 @@ int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
>   */
>  int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
>  
> +/**
> + * stop_core_cpuslocked: - stop all threads on just one core
> + * @cpu: any cpu in the targeted core
> + * @fn: the function to run
> + * @data: the data ptr for @fn()
> + *
> + * Same as above, but instead of every CPU, only the logical CPUs of a
> + * single core are affected.
> + *
> + * Context: Must be called from within a cpus_read_lock() protected region.
> + *
> + * Return: 0 if all executions of @fn returned 0, any non zero return
> + * value if any returned non zero.
> + */
> +int stop_core_cpuslocked(unsigned int cpu, cpu_stop_fn_t fn, void *data);
> +
>  int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
>  				   const struct cpumask *cpus);
>  #else	/* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
> diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
> index cbc30271ea4d..579761729836 100644
> --- a/kernel/stop_machine.c
> +++ b/kernel/stop_machine.c
> @@ -633,6 +633,25 @@ int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
>  }
>  EXPORT_SYMBOL_GPL(stop_machine);
>  
> +int stop_core_cpuslocked(unsigned int cpu, cpu_stop_fn_t fn, void *data)
> +{
> +	const struct cpumask *smt_mask = cpu_smt_mask(cpu);
> +
> +	struct multi_stop_data msdata = {
> +		.fn = fn,
> +		.data = data,
> +		.num_threads = cpumask_weight(smt_mask),
> +		.active_cpus = smt_mask,
> +	};
> +
> +	lockdep_assert_cpus_held();
> +
> +	/* Set the initial state and stop all online cpus. */
> +	set_state(&msdata, MULTI_STOP_PREPARE);
> +	return stop_cpus(smt_mask, multi_cpu_stop, &msdata);
> +}
> +EXPORT_SYMBOL_GPL(stop_core_cpuslocked);
> +
>  /**
>   * stop_machine_from_inactive_cpu - stop_machine() from inactive CPU
>   * @fn: the function to run
> -- 
> 2.35.1
>
diff mbox series

Patch

diff --git a/drivers/platform/x86/intel/ifs/Makefile b/drivers/platform/x86/intel/ifs/Makefile
index 98b6fde15689..cedcb103f860 100644
--- a/drivers/platform/x86/intel/ifs/Makefile
+++ b/drivers/platform/x86/intel/ifs/Makefile
@@ -1,3 +1,3 @@ 
 obj-$(CONFIG_INTEL_IFS)		+= intel_ifs.o
 
-intel_ifs-objs			:= core.o load.o
+intel_ifs-objs			:= core.o load.o runtest.o
diff --git a/drivers/platform/x86/intel/ifs/core.c b/drivers/platform/x86/intel/ifs/core.c
index d4a54ff47447..d408290480c7 100644
--- a/drivers/platform/x86/intel/ifs/core.c
+++ b/drivers/platform/x86/intel/ifs/core.c
@@ -47,10 +47,14 @@  static int __init ifs_init(void)
 	if (rdmsrl_safe(MSR_INTEGRITY_CAPS, &msrval))
 		return -ENODEV;
 
+	if (ifs_setup_wq())
+		return -ENOMEM;
+
 	if ((msrval & BIT(ifs_device.data.integrity_cap_bit)) &&
 	    !misc_register(&ifs_device.misc)) {
 		ifs_load_firmware(ifs_device.misc.this_device);
 	} else {
+		ifs_destroy_wq();
 		return -ENODEV;
 	}
 
@@ -60,6 +64,7 @@  static int __init ifs_init(void)
 static void __exit ifs_exit(void)
 {
 	misc_deregister(&ifs_device.misc);
+	ifs_destroy_wq();
 }
 
 module_init(ifs_init);
diff --git a/drivers/platform/x86/intel/ifs/ifs.h b/drivers/platform/x86/intel/ifs/ifs.h
index d985b4a50f46..31e58a36ab75 100644
--- a/drivers/platform/x86/intel/ifs/ifs.h
+++ b/drivers/platform/x86/intel/ifs/ifs.h
@@ -11,6 +11,13 @@ 
 #define MSR_SCAN_HASHES_STATUS			0x000002c3
 #define MSR_AUTHENTICATE_AND_COPY_CHUNK		0x000002c4
 #define MSR_CHUNKS_AUTHENTICATION_STATUS	0x000002c5
+#define MSR_ACTIVATE_SCAN			0x000002c6
+#define MSR_SCAN_STATUS				0x000002c7
+#define SCAN_NOT_TESTED				0
+#define SCAN_TEST_PASS				1
+#define SCAN_TEST_FAIL				2
+#define SPINUNIT				100
+#define THREAD_WAIT				5
 
 /* MSR_SCAN_HASHES_STATUS bit fields */
 union ifs_scan_hashes_status {
@@ -38,6 +45,40 @@  union ifs_chunks_auth_status {
 	};
 };
 
+/* MSR_ACTIVATE_SCAN bit fields */
+union ifs_scan {
+	u64	data;
+	struct {
+		u32	start	:8;
+		u32	stop	:8;
+		u32	rsvd	:16;
+		u32	delay	:31;
+		u32	sigmce	:1;
+	};
+};
+
+/* MSR_SCAN_STATUS bit fields */
+union ifs_status {
+	u64	data;
+	struct {
+		u32	chunk_num		:8;
+		u32	chunk_stop_index	:8;
+		u32	rsvd1			:16;
+		u32	error_code		:8;
+		u32	rsvd2			:22;
+		u32	control_error		:1;
+		u32	signature_error		:1;
+	};
+};
+
+/*
+ * Driver populated error-codes
+ * 0xFD: Test timed out before completing all the chunks.
+ * 0xFE: not all scan chunks were executed. Maximum forward progress retries exceeded.
+ */
+#define IFS_SW_TIMEOUT				0xFD
+#define IFS_SW_PARTIAL_COMPLETION		0xFE
+
 /**
  * struct ifs_data - attributes related to intel IFS driver
  * @integrity_cap_bit - MSR_INTEGRITY_CAPS bit enumerating this test
@@ -45,6 +86,8 @@  union ifs_chunks_auth_status {
  * @loaded: If a valid test binary has been loaded into the memory
  * @loading_error: Error occurred on another CPU while loading image
  * @valid_chunks: number of chunks which could be validated.
+ * @status: it holds simple status pass/fail/untested
+ * @scan_details: opaque scan status code from h/w
  */
 struct ifs_data {
 	int integrity_cap_bit;
@@ -52,6 +95,8 @@  struct ifs_data {
 	bool loaded;
 	bool loading_error;
 	int valid_chunks;
+	int status;
+	u64 scan_details;
 };
 
 struct ifs_device {
@@ -68,5 +113,8 @@  static inline struct ifs_data *ifs_get_data(struct device *dev)
 }
 
 void ifs_load_firmware(struct device *dev);
+int ifs_setup_wq(void);
+void ifs_destroy_wq(void);
+int do_core_test(int cpu, struct device *dev);
 
 #endif
diff --git a/drivers/platform/x86/intel/ifs/runtest.c b/drivers/platform/x86/intel/ifs/runtest.c
new file mode 100644
index 000000000000..c30cc9c95b4f
--- /dev/null
+++ b/drivers/platform/x86/intel/ifs/runtest.c
@@ -0,0 +1,327 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2022 Intel Corporation. */
+
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/nmi.h>
+#include <linux/slab.h>
+
+#include "ifs.h"
+
+/*
+ * Note all code and data in this file is protected by
+ * ifs_sem. On HT systems all threads on a core will
+ * execute together, but only the first thread on the
+ * core will update results of the test and indicate
+ * completion.
+ */
+static struct workqueue_struct *ifs_wq;
+static struct completion test_thread_done;
+static atomic_t siblings_in;
+static atomic_t siblings_out;
+static int cpu_sibl_ct;
+static bool scan_enabled = true;
+
+struct ifs_work {
+	struct work_struct w;
+	struct device *dev;
+};
+
+/* Max retries on the same chunk */
+#define MAX_IFS_RETRIES  5
+
+static unsigned long msec_to_tsc(unsigned long msec)
+{
+	return tsc_khz * 1000 * msec / MSEC_PER_SEC;
+}
+
+enum ifs_status_err_code {
+	IFS_NO_ERROR				= 0,
+	IFS_OTHER_THREAD_COULD_NOT_JOIN		= 1,
+	IFS_INTERRUPTED_BEFORE_RENDEZVOUS	= 2,
+	IFS_POWER_MGMT_INADEQUATE_FOR_SCAN	= 3,
+	IFS_INVALID_CHUNK_RANGE			= 4,
+	IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS	= 5,
+	IFS_CORE_NOT_CAPABLE_CURRENTLY		= 6,
+	IFS_UNASSIGNED_ERROR_CODE		= 7,
+	IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT	= 8,
+	IFS_INTERRUPTED_DURING_EXECUTION	= 9,
+};
+
+static const char * const scan_test_status[] = {
+	[IFS_NO_ERROR] = "SCAN no error",
+	[IFS_OTHER_THREAD_COULD_NOT_JOIN] = "Other thread could not join.",
+	[IFS_INTERRUPTED_BEFORE_RENDEZVOUS] = "Interrupt occurred prior to SCAN coordination.",
+	[IFS_POWER_MGMT_INADEQUATE_FOR_SCAN] =
+	"Core Abort SCAN Response due to power management condition.",
+	[IFS_INVALID_CHUNK_RANGE] = "Non valid chunks in the range",
+	[IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS] = "Mismatch in arguments between threads T0/T1.",
+	[IFS_CORE_NOT_CAPABLE_CURRENTLY] = "Core not capable of performing SCAN currently",
+	[IFS_UNASSIGNED_ERROR_CODE] = "Unassigned error code 0x7",
+	[IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT] =
+	"Exceeded number of Logical Processors (LP) allowed to run Scan-At-Field concurrently",
+	[IFS_INTERRUPTED_DURING_EXECUTION] = "Interrupt occurred prior to SCAN start",
+};
+
+static void message_not_tested(struct device *dev, int cpu, union ifs_status status)
+{
+	if (status.error_code < ARRAY_SIZE(scan_test_status))
+		dev_info(dev, "CPU(s) %*pbl: SCAN operation did not start. %s\n",
+			 cpumask_pr_args(topology_sibling_cpumask(cpu)),
+			 scan_test_status[status.error_code]);
+	else if (status.error_code == IFS_SW_TIMEOUT)
+		dev_info(dev, "CPU(s) %*pbl: software timeout during scan\n",
+			 cpumask_pr_args(topology_sibling_cpumask(cpu)));
+	else if (status.error_code == IFS_SW_PARTIAL_COMPLETION)
+		dev_info(dev, "CPU(s) %*pbl: %s\n",
+			 cpumask_pr_args(topology_sibling_cpumask(cpu)),
+			 "Not all scan chunks were executed. Maximum forward progress retries exceeded");
+	else
+		dev_info(dev, "CPU(s) %*pbl: SCAN unknown status %llx\n",
+			 cpumask_pr_args(topology_sibling_cpumask(cpu)), status.data);
+}
+
+static void message_fail(struct device *dev, int cpu, union ifs_status status)
+{
+	/*
+	 * control_error is set when the microcode runs into a problem
+	 * loading the image from the reserved BIOS memory, or it has
+	 * been corrupted. Reloading the image may fix this issue.
+	 */
+	if (status.control_error) {
+		dev_err(dev, "CPU(s) %*pbl: could not execute from loaded scan image\n",
+			cpumask_pr_args(topology_sibling_cpumask(cpu)));
+	}
+
+	/*
+	 * signature_error is set when the output from the scan chains does not
+	 * match the expected signature. This might be a transient problem (e.g.
+	 * due to a bit flip from an alpha particle or neutron). If the problem
+	 * repeats on a subsequent test, then it indicates an actual problem in
+	 * the core being tested.
+	 */
+	if (status.signature_error) {
+		dev_err(dev, "CPU(s) %*pbl: test signature incorrect.\n",
+			cpumask_pr_args(topology_sibling_cpumask(cpu)));
+	}
+}
+
+static bool can_restart(union ifs_status status)
+{
+	enum ifs_status_err_code err_code = status.error_code;
+
+	/* Signature for chunk is bad, or scan test failed */
+	if (status.signature_error || status.control_error)
+		return false;
+
+	switch (err_code) {
+	case IFS_NO_ERROR:
+	case IFS_OTHER_THREAD_COULD_NOT_JOIN:
+	case IFS_INTERRUPTED_BEFORE_RENDEZVOUS:
+	case IFS_POWER_MGMT_INADEQUATE_FOR_SCAN:
+	case IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT:
+	case IFS_INTERRUPTED_DURING_EXECUTION:
+		return true;
+	case IFS_INVALID_CHUNK_RANGE:
+	case IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS:
+	case IFS_CORE_NOT_CAPABLE_CURRENTLY:
+	case IFS_UNASSIGNED_ERROR_CODE:
+		break;
+	}
+	return false;
+}
+
+static bool wait_for_siblings(struct device *dev, struct ifs_data *ifsd, atomic_t *t, long long timeout)
+{
+	atomic_inc(t);
+	while (atomic_read(t) < cpu_sibl_ct) {
+		if (timeout < SPINUNIT) {
+			dev_err(dev,
+				"Timeout while waiting for CPUs rendezvous, remaining: %d\n",
+				cpu_sibl_ct - atomic_read(t));
+			return false;
+		}
+
+		ndelay(SPINUNIT);
+		timeout -= SPINUNIT;
+
+		touch_nmi_watchdog();
+	}
+
+	return true;
+}
+
+/*
+ * When a Scan test (for a particular core) is triggered by the user, worker threads
+ * for each sibling cpus(belonging to that core) are queued to execute this function in
+ * the Workqueue (ifs_wq) context.
+ * Wait for the sibling thread to join before the execution.
+ * Execute the scan test by running wrmsr(MSR_ACTIVATE_SCAN).
+ */
+static void ifs_work_func(struct work_struct *work)
+{
+	struct ifs_work *local_work = container_of(work, struct ifs_work, w);
+	int cpu = smp_processor_id();
+	union ifs_scan activate;
+	union ifs_status status;
+	unsigned long timeout;
+	struct ifs_data *ifsd;
+	struct device *dev;
+	int retries;
+	u32 first;
+
+	dev = local_work->dev;
+	ifsd = ifs_get_data(dev);
+
+	activate.rsvd = 0;
+	activate.delay = msec_to_tsc(THREAD_WAIT);
+	activate.sigmce = 0;
+
+	/*
+	 * Need to get (and keep) the threads on this core executing close together
+	 * so that the writes to MSR_ACTIVATE_SCAN below will succeed in entering
+	 * IFS test mode on this core. Interrupts on each thread are expected to be
+	 * brief. But preemption would be a problem.
+	 */
+	preempt_disable();
+
+	/* wait for the sibling threads to join */
+	first = cpumask_first(topology_sibling_cpumask(cpu));
+	if (!wait_for_siblings(dev, ifsd, &siblings_in, NSEC_PER_SEC)) {
+		preempt_enable();
+		dev_err(dev, "cpu %d sibling did not join rendezvous\n", cpu);
+		goto out;
+	}
+
+	activate.start = 0;
+	activate.stop = ifsd->valid_chunks - 1;
+	timeout = jiffies + HZ / 2;
+	retries = MAX_IFS_RETRIES;
+
+	while (activate.start <= activate.stop) {
+		if (time_after(jiffies, timeout)) {
+			status.error_code = IFS_SW_TIMEOUT;
+			break;
+		}
+
+		local_irq_disable();
+		wrmsrl(MSR_ACTIVATE_SCAN, activate.data);
+		local_irq_enable();
+
+		/*
+		 * All logical CPUs on this core are now running IFS test. When it completes
+		 * execution or is interrupted, the following RDMSR gets the scan status.
+		 */
+
+		rdmsrl(MSR_SCAN_STATUS, status.data);
+
+		/* Some cases can be retried, give up for others */
+		if (!can_restart(status))
+			break;
+
+		if (status.chunk_num == activate.start) {
+			/* Check for forward progress */
+			if (retries-- == 0) {
+				if (status.error_code == IFS_NO_ERROR)
+					status.error_code = IFS_SW_PARTIAL_COMPLETION;
+				break;
+			}
+		} else {
+			retries = MAX_IFS_RETRIES;
+			activate.start = status.chunk_num;
+		}
+	}
+
+	preempt_enable();
+
+	if (cpu == first) {
+		/* Update status for this core */
+		ifsd->scan_details = status.data;
+
+		if (status.control_error || status.signature_error) {
+			ifsd->status = SCAN_TEST_FAIL;
+			message_fail(dev, cpu, status);
+		} else if (status.error_code) {
+			ifsd->status = SCAN_NOT_TESTED;
+			message_not_tested(dev, cpu, status);
+		} else {
+			ifsd->status = SCAN_TEST_PASS;
+		}
+	}
+
+	if (!wait_for_siblings(dev, ifsd, &siblings_out, NSEC_PER_SEC))
+		dev_err(dev, "cpu %d sibling did not exit rendezvous\n", cpu);
+
+out:
+	if (cpu == first)
+		complete(&test_thread_done);
+}
+
+/*
+ * Initiate per core test. It wakes up work queue threads on the target cpu and
+ * its sibling cpu. Once all sibling threads wake up, the scan test gets executed and
+ * wait for all sibling threads to finish the scan test.
+ */
+int do_core_test(int cpu, struct device *dev)
+{
+	struct ifs_work *local_work;
+	int sibling;
+	int ret = 0;
+	int i = 0;
+
+	if (!scan_enabled)
+		return -ENXIO;
+
+	cpu_hotplug_disable();
+	if (!cpu_online(cpu)) {
+		dev_info(dev, "cannot test on the offline cpu %d\n", cpu);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	reinit_completion(&test_thread_done);
+	atomic_set(&siblings_in, 0);
+	atomic_set(&siblings_out, 0);
+
+	cpu_sibl_ct = cpumask_weight(topology_sibling_cpumask(cpu));
+	local_work = kcalloc(cpu_sibl_ct, sizeof(*local_work), GFP_NOWAIT);
+	if (!local_work) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	for_each_cpu(sibling, topology_sibling_cpumask(cpu)) {
+		local_work[i].dev = dev;
+		INIT_WORK(&local_work[i].w, ifs_work_func);
+		queue_work_on(sibling, ifs_wq, &local_work[i].w);
+		i++;
+	}
+
+	if (wait_for_completion_timeout(&test_thread_done, HZ) == 0) {
+		dev_err(dev, "cpu %d Core locked up during IFS test? IFS disabled\n", cpu);
+		scan_enabled = false;
+	}
+
+	kfree(local_work);
+out:
+	cpu_hotplug_enable();
+	return ret;
+}
+
+int ifs_setup_wq(void)
+{
+	/* Flags are to keep all the sibling cpu worker threads (of a core) in close sync */
+	ifs_wq = alloc_workqueue("intel_ifs", (WQ_HIGHPRI | WQ_CPU_INTENSIVE), 1);
+	if (!ifs_wq)
+		return -ENOMEM;
+
+	init_completion(&test_thread_done);
+
+	return 0;
+}
+
+void ifs_destroy_wq(void)
+{
+	destroy_workqueue(ifs_wq);
+}