[2/4] Implement H_SET_MODE for ppc64le

Message ID	1458544650-31416-3-git-send-email-bsingharora@gmail.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <kvm-owner@kernel.org> From: Balbir Singh <bsingharora@gmail.com> To: will.deacon@arm.com, kvm@vger.kernel.org Cc: penberg@kernel.org, mpe@ellerman.id.au, mikey@neuling.org, aik@ozlabs.ru, Balbir Singh <bsingharora@gmail.com> Subject: [PATCH 2/4] Implement H_SET_MODE for ppc64le Date: Mon, 21 Mar 2016 18:17:28 +1100 Message-Id: <1458544650-31416-3-git-send-email-bsingharora@gmail.com> In-Reply-To: <1458544650-31416-1-git-send-email-bsingharora@gmail.com> References: <1458544650-31416-1-git-send-email-bsingharora@gmail.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk

Message ID

1458544650-31416-3-git-send-email-bsingharora@gmail.com (mailing list archive)

State

New, archived

Headers

From: Balbir Singh <bsingharora@gmail.com>
To: will.deacon@arm.com, kvm@vger.kernel.org
Cc: penberg@kernel.org, mpe@ellerman.id.au, mikey@neuling.org,
	aik@ozlabs.ru, Balbir Singh <bsingharora@gmail.com>
Subject: [PATCH 2/4] Implement H_SET_MODE for ppc64le
Date: Mon, 21 Mar 2016 18:17:28 +1100
Message-Id: <1458544650-31416-3-git-send-email-bsingharora@gmail.com>
In-Reply-To: <1458544650-31416-1-git-send-email-bsingharora@gmail.com>
References: <1458544650-31416-1-git-send-email-bsingharora@gmail.com>
Sender: kvm-owner@vger.kernel.org
Precedence: bulk

Commit Message

Education Directorate March 21, 2016, 7:17 a.m. UTC

Basic infrastructure for queuing a task to a specifici CPU and
the use of that in setting ILE (Little Endian Interrupt Handling)
on power via h_set_mode hypercall

Signed-off-by: Balbir Singh <bsingharora@gmail.com>
---
 include/kvm/kvm-cpu.h              |   7 +++
 include/kvm/kvm.h                  |   1 +
 kvm-cpu.c                          |  50 +++++++++++++++++
 powerpc/include/kvm/kvm-cpu-arch.h |   2 +
 powerpc/kvm.c                      |   2 +-
 powerpc/spapr.h                    |  15 ++++-
 powerpc/spapr_hcall.c              | 111 +++++++++++++++++++++++++++++++++++++
 7 files changed, 185 insertions(+), 3 deletions(-)

Comments

Michael Ellerman March 30, 2016, 5:39 a.m. UTC | #1

Hi Balbir,

So I got this running and it seems to work well.

I have some comments on the implementation though, see below ...

On Mon, 2016-03-21 at 18:17 +1100, Balbir Singh wrote:

> Basic infrastructure for queuing a task to a specifici CPU and
> the use of that in setting ILE (Little Endian Interrupt Handling)
> on power via h_set_mode hypercall
> 
> Signed-off-by: Balbir Singh <bsingharora@gmail.com>
> diff --git a/include/kvm/kvm.h b/include/kvm/kvm.h
> index 37155db..731abee 100644
> --- a/include/kvm/kvm.h
> +++ b/include/kvm/kvm.h
> @@ -15,6 +15,7 @@
>  
>  #define SIGKVMEXIT		(SIGRTMIN + 0)
>  #define SIGKVMPAUSE		(SIGRTMIN + 1)
> +#define SIGKVMTASK		(SIGRTMIN + 2)
>  
>  #define KVM_PID_FILE_PATH	"/.lkvm/"
>  #define HOME_DIR		getenv("HOME")
> diff --git a/kvm-cpu.c b/kvm-cpu.c
> index ad4441b..438414f 100644
> --- a/kvm-cpu.c
> +++ b/kvm-cpu.c
> @@ -83,10 +83,59 @@ void kvm_cpu__reboot(struct kvm *kvm)
>  	}
>  }
>  
> +static void kvm_cpu__run_task(int sig, siginfo_t * info, void *context)
> +{
> +	union sigval val;
> +	struct kvm_cpu_task *task_ptr;
> +
> +	if (!info) {
> +		pr_warning("signal queued without info\n");
> +		return;
> +	}
> +
> +	val = info->si_value;
> +	task_ptr = val.sival_ptr;
> +	if (!task_ptr) {
> +		pr_warning("Task queued without data\n");
> +		return;
> +	}
> +
> +	if (!task_ptr->task || !task_ptr->data) {
> +		pr_warning("Failed to get task information\n");
> +		return;
> +	}
> +
> +	task_ptr->task(task_ptr->data);
> +	free(task_ptr);
> +}

I don't think it's safe to do the actual task call from signal context. Rather
it should set a flag that the main loop detects and then runs the task there.

> +int kvm_cpu__queue_task(struct kvm_cpu *cpu, void (*task)(void *data),
> +			void *data)
> +{
> +	struct kvm_cpu_task *task_ptr = NULL;
> +	union sigval val;
> +
> +	task_ptr = malloc(sizeof(struct kvm_cpu_task));
> +	if (!task_ptr)
> +		return -ENOMEM;
> +
> +	task_ptr->task = task;
> +	task_ptr->data = data;
> +	val.sival_ptr = task_ptr;
> +
> +	pthread_sigqueue(cpu->thread, SIGKVMTASK, val);
> +	return 0;
> +}

I think it would be nicer if this interface dealt with waiting for the
response. Rather than the caller having to do it.

Possibly in future we'll want to do an async task, but we can refactor the code
then to skip doing the wait.

> diff --git a/powerpc/include/kvm/kvm-cpu-arch.h b/powerpc/include/kvm/kvm-cpu-arch.h
> index 01eafdf..033b702 100644
> --- a/powerpc/include/kvm/kvm-cpu-arch.h
> +++ b/powerpc/include/kvm/kvm-cpu-arch.h
> @@ -38,6 +38,8 @@
>  
>  #define POWER7_EXT_IRQ	0
>  
> +#define LPCR_ILE (1 << (63-38))
> +
>  struct kvm;
>  
>  struct kvm_cpu {
> diff --git a/powerpc/spapr.h b/powerpc/spapr.h
> index 8b294d1..f851f4a 100644
> --- a/powerpc/spapr.h
> +++ b/powerpc/spapr.h
> @@ -27,7 +27,7 @@ typedef uintptr_t target_phys_addr_t;
>  #define H_HARDWARE	-1	/* Hardware error */
>  #define H_FUNCTION	-2	/* Function not supported */
>  #define H_PARAMETER	-4	/* Parameter invalid, out-of-range or conflicting */
> -
> +#define H_P2		-55
>  #define H_SET_DABR		0x28
>  #define H_LOGICAL_CI_LOAD	0x3c
>  #define H_LOGICAL_CI_STORE	0x40
> @@ -41,7 +41,18 @@ typedef uintptr_t target_phys_addr_t;
>  #define H_EOI			0x64
>  #define H_IPI			0x6c
>  #define H_XIRR			0x74
> -#define MAX_HCALL_OPCODE	H_XIRR
> +#define H_SET_MODE		0x31C
> +#define MAX_HCALL_OPCODE	H_SET_MODE
> +
> +/* Values for 2nd argument to H_SET_MODE */
> +#define H_SET_MODE_RESOURCE_SET_CIABR		1
> +#define H_SET_MODE_RESOURCE_SET_DAWR		2
> +#define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE	3
> +#define H_SET_MODE_RESOURCE_LE			4
> +
> +/* Flags for H_SET_MODE_RESOURCE_LE */
> +#define H_SET_MODE_ENDIAN_BIG		0
> +#define H_SET_MODE_ENDIAN_LITTLE	1
>  
>  /*
>   * The hcalls above are standardized in PAPR and implemented by pHyp
> diff --git a/powerpc/spapr_hcall.c b/powerpc/spapr_hcall.c
> index ff1d63a..682fad5 100644
> --- a/powerpc/spapr_hcall.c
> +++ b/powerpc/spapr_hcall.c
> @@ -18,6 +18,9 @@
>  
>  #include <stdio.h>
>  #include <assert.h>
> +#include <sys/eventfd.h>
> +
> +static int task_event;
>  
>  static spapr_hcall_fn papr_hypercall_table[(MAX_HCALL_OPCODE / 4) + 1];
>  static spapr_hcall_fn kvmppc_hypercall_table[KVMPPC_HCALL_MAX -
> @@ -74,6 +77,113 @@ static target_ulong h_logical_dcbf(struct kvm_cpu *vcpu, target_ulong opcode, ta
>  	return H_SUCCESS;
>  }
>  
> +struct lpcr_data {
> +	struct kvm_cpu	*cpu;
> +	int		mode;
> +};
> +
> +static int get_cpu_lpcr(struct kvm_cpu *vcpu, target_ulong *lpcr)
> +{
> +	struct kvm_one_reg reg = {
> +		.id = KVM_REG_PPC_LPCR_64,
> +		.addr = (__u64)lpcr
> +	};
> +
> +	return ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, &reg);
> +}
> +
> +static int set_cpu_lpcr(struct kvm_cpu *vcpu, target_ulong *lpcr)

This function has a reasonable name ..

> +{
> +	struct kvm_one_reg reg = {
> +		.id = KVM_REG_PPC_LPCR_64,
> +		.addr = (__u64)lpcr
> +	};
> +
> +	return ioctl(vcpu->vcpu_fd, KVM_SET_ONE_REG, &reg);
> +}
> +
> +static void set_lpcr_cpu(void *data)

But then this one is *very* similar.

I think this should actually be called set_cpu_ile(), because that's what it
does. And maybe have "task" in the name because it's the version for using with
kvm_cpu__queue_task().

> +{
> +	struct lpcr_data *fn_data = (struct lpcr_data *)data;
> +	int ret;
> +	target_ulong lpcr;
> +	u64 task_done = 1;
> +
> +	if (!fn_data || !fn_data->cpu)
> +		return;

This should be hard errors IMHO.

> +	ret = get_cpu_lpcr(fn_data->cpu, &lpcr);
> +	if (ret < 0)
> +		return;

Uh oh!

It looks like most code calls die() if KVM_SET_ONE_REG fails, that would be
preferable I think than running some cpus with a different endian :)

> +	if (fn_data->mode == H_SET_MODE_ENDIAN_BIG)
> +		lpcr &= ~LPCR_ILE;
> +	else
> +		lpcr |= LPCR_ILE;
> +
> +	ret = set_cpu_lpcr(fn_data->cpu, &lpcr);
> +	if (ret < 0)
> +		return;
> +
> +	free(data);

I don't think we should be doing the free here.

> +	if (write(task_event, &task_done, sizeof(task_done)) < 0)
> +		pr_warning("Failed to notify of lpcr task done\n");
> +}
> +
> +#define for_each_vcpu(cpu, kvm, i) \
> +	for ((i) = 0, (cpu) = (kvm)->cpus[i]; (i) < (kvm)->nrcpus; (i)++, (cpu) = (kvm)->cpus[i])

That should probably be in a header.
>
> +static target_ulong h_set_mode(struct kvm_cpu *vcpu, target_ulong opcode, target_ulong *args)
> +{
> +	int ret = H_SUCCESS;

That init should be removed.

> +	struct kvm *kvm = vcpu->kvm;
> +	struct kvm_cpu *cpu;
> +	int i;
> +
> +	switch (args[1]) {
> +	case H_SET_MODE_RESOURCE_LE: {
> +		u64 total_done = 0;
> +		u64 task_read;
> +
> +		task_event = eventfd(0, 0);
> +		if (task_event < 0) {
> +			pr_warning("Failed to create task_event");
> +			break;

That will return H_SUCCESS which is not OK.

> +		}
> +		for_each_vcpu(cpu, kvm, i) {
> +			struct lpcr_data *data;
> +
> +			data = malloc(sizeof(struct lpcr_data));

Is there any reason not to do this synchronously?

That would allow you to put data on the stack. And also avoid the while loop
below.

> +			if (!data) {
> +				ret = H_P2;
> +				break;
> +			}
> +			data->cpu = cpu;
> +			data->mode = args[0];
> +
> +			kvm_cpu__queue_task(cpu, set_lpcr_cpu, data);
> +		}
> +
> +		while ((int)total_done < kvm->nrcpus) {
> +			int err;
> +			err = read(task_event, &task_read, sizeof(task_read));
> +			if (err < 0) {
> +				ret = H_P2;
> +				break;
> +			}
> +			total_done += task_read;
> +		}
> +		close(task_event);
> +		break;
> +	}
> +	default:
> +		ret = H_FUNCTION;
> +		break;
> +	}
> +	return (ret < 0) ? H_P2 : H_SUCCESS;
> +}

I think that ends up being correct, but it's pretty obscure. ie. for an
unsupported resource we should return H_P2, and you get that to happen by
setting ret to H_FUNCTION (-2).

cheers

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Education Directorate March 30, 2016, 11:08 a.m. UTC | #2

On 30/03/16 16:39, Michael Ellerman wrote:
> Hi Balbir,
>
> So I got this running and it seems to work well.
>
> I have some comments on the implementation though, see below ...
>
> On Mon, 2016-03-21 at 18:17 +1100, Balbir Singh wrote:
>
>> Basic infrastructure for queuing a task to a specifici CPU and
>> the use of that in setting ILE (Little Endian Interrupt Handling)
>> on power via h_set_mode hypercall
>>
>> Signed-off-by: Balbir Singh <bsingharora@gmail.com>
>> diff --git a/include/kvm/kvm.h b/include/kvm/kvm.h
>> index 37155db..731abee 100644
>> --- a/include/kvm/kvm.h
>> +++ b/include/kvm/kvm.h
>> @@ -15,6 +15,7 @@
>>  
>>  #define SIGKVMEXIT		(SIGRTMIN + 0)
>>  #define SIGKVMPAUSE		(SIGRTMIN + 1)
>> +#define SIGKVMTASK		(SIGRTMIN + 2)
>>  
>>  #define KVM_PID_FILE_PATH	"/.lkvm/"
>>  #define HOME_DIR		getenv("HOME")
>> diff --git a/kvm-cpu.c b/kvm-cpu.c
>> index ad4441b..438414f 100644
>> --- a/kvm-cpu.c
>> +++ b/kvm-cpu.c
>> @@ -83,10 +83,59 @@ void kvm_cpu__reboot(struct kvm *kvm)
>>  	}
>>  }
>>  
>> +static void kvm_cpu__run_task(int sig, siginfo_t * info, void *context)
>> +{
>> +	union sigval val;
>> +	struct kvm_cpu_task *task_ptr;
>> +
>> +	if (!info) {
>> +		pr_warning("signal queued without info\n");
>> +		return;
>> +	}
>> +
>> +	val = info->si_value;
>> +	task_ptr = val.sival_ptr;
>> +	if (!task_ptr) {
>> +		pr_warning("Task queued without data\n");
>> +		return;
>> +	}
>> +
>> +	if (!task_ptr->task || !task_ptr->data) {
>> +		pr_warning("Failed to get task information\n");
>> +		return;
>> +	}
>> +
>> +	task_ptr->task(task_ptr->data);
>> +	free(task_ptr);
>> +}
> I don't think it's safe to do the actual task call from signal context. Rather
> it should set a flag that the main loop detects and then runs the task there.
I don't think it matters. We do other stuff like kvm__pause() from signal context.
The problem I see with the main loop detection is that we could potentially have
both modes queued up

>> +int kvm_cpu__queue_task(struct kvm_cpu *cpu, void (*task)(void *data),
>> +			void *data)
>> +{
>> +	struct kvm_cpu_task *task_ptr = NULL;
>> +	union sigval val;
>> +
>> +	task_ptr = malloc(sizeof(struct kvm_cpu_task));
>> +	if (!task_ptr)
>> +		return -ENOMEM;
>> +
>> +	task_ptr->task = task;
>> +	task_ptr->data = data;
>> +	val.sival_ptr = task_ptr;
>> +
>> +	pthread_sigqueue(cpu->thread, SIGKVMTASK, val);
>> +	return 0;
>> +}
> I think it would be nicer if this interface dealt with waiting for the
> response. Rather than the caller having to do it.
>
> Possibly in future we'll want to do an async task, but we can refactor the code
> then to skip doing the wait.
I wrote the core code (powerpc independent bits) to be async with an example of
our code of how to do sync.
>> diff --git a/powerpc/include/kvm/kvm-cpu-arch.h b/powerpc/include/kvm/kvm-cpu-arch.h
>> index 01eafdf..033b702 100644
>> --- a/powerpc/include/kvm/kvm-cpu-arch.h
>> +++ b/powerpc/include/kvm/kvm-cpu-arch.h
>> @@ -38,6 +38,8 @@
>>  
>>  #define POWER7_EXT_IRQ	0
>>  
>> +#define LPCR_ILE (1 << (63-38))
>> +
>>  struct kvm;
>>  
>>  struct kvm_cpu {
>> diff --git a/powerpc/spapr.h b/powerpc/spapr.h
>> index 8b294d1..f851f4a 100644
>> --- a/powerpc/spapr.h
>> +++ b/powerpc/spapr.h
>> @@ -27,7 +27,7 @@ typedef uintptr_t target_phys_addr_t;
>>  #define H_HARDWARE	-1	/* Hardware error */
>>  #define H_FUNCTION	-2	/* Function not supported */
>>  #define H_PARAMETER	-4	/* Parameter invalid, out-of-range or conflicting */
>> -
>> +#define H_P2		-55
>>  #define H_SET_DABR		0x28
>>  #define H_LOGICAL_CI_LOAD	0x3c
>>  #define H_LOGICAL_CI_STORE	0x40
>> @@ -41,7 +41,18 @@ typedef uintptr_t target_phys_addr_t;
>>  #define H_EOI			0x64
>>  #define H_IPI			0x6c
>>  #define H_XIRR			0x74
>> -#define MAX_HCALL_OPCODE	H_XIRR
>> +#define H_SET_MODE		0x31C
>> +#define MAX_HCALL_OPCODE	H_SET_MODE
>> +
>> +/* Values for 2nd argument to H_SET_MODE */
>> +#define H_SET_MODE_RESOURCE_SET_CIABR		1
>> +#define H_SET_MODE_RESOURCE_SET_DAWR		2
>> +#define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE	3
>> +#define H_SET_MODE_RESOURCE_LE			4
>> +
>> +/* Flags for H_SET_MODE_RESOURCE_LE */
>> +#define H_SET_MODE_ENDIAN_BIG		0
>> +#define H_SET_MODE_ENDIAN_LITTLE	1
>>  
>>  /*
>>   * The hcalls above are standardized in PAPR and implemented by pHyp
>> diff --git a/powerpc/spapr_hcall.c b/powerpc/spapr_hcall.c
>> index ff1d63a..682fad5 100644
>> --- a/powerpc/spapr_hcall.c
>> +++ b/powerpc/spapr_hcall.c
>> @@ -18,6 +18,9 @@
>>  
>>  #include <stdio.h>
>>  #include <assert.h>
>> +#include <sys/eventfd.h>
>> +
>> +static int task_event;
>>  
>>  static spapr_hcall_fn papr_hypercall_table[(MAX_HCALL_OPCODE / 4) + 1];
>>  static spapr_hcall_fn kvmppc_hypercall_table[KVMPPC_HCALL_MAX -
>> @@ -74,6 +77,113 @@ static target_ulong h_logical_dcbf(struct kvm_cpu *vcpu, target_ulong opcode, ta
>>  	return H_SUCCESS;
>>  }
>>  
>> +struct lpcr_data {
>> +	struct kvm_cpu	*cpu;
>> +	int		mode;
>> +};
>> +
>> +static int get_cpu_lpcr(struct kvm_cpu *vcpu, target_ulong *lpcr)
>> +{
>> +	struct kvm_one_reg reg = {
>> +		.id = KVM_REG_PPC_LPCR_64,
>> +		.addr = (__u64)lpcr
>> +	};
>> +
>> +	return ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, &reg);
>> +}
>> +
>> +static int set_cpu_lpcr(struct kvm_cpu *vcpu, target_ulong *lpcr)
> This function has a reasonable name ..
>
>> +{
>> +	struct kvm_one_reg reg = {
>> +		.id = KVM_REG_PPC_LPCR_64,
>> +		.addr = (__u64)lpcr
>> +	};
>> +
>> +	return ioctl(vcpu->vcpu_fd, KVM_SET_ONE_REG, &reg);
>> +}
>> +
>> +static void set_lpcr_cpu(void *data)
> But then this one is *very* similar.
>
> I think this should actually be called set_cpu_ile(), because that's what it
> does. And maybe have "task" in the name because it's the version for using with
> kvm_cpu__queue_task().
I'll change this to set_cpu_ile -- good catch
>
>> +{
>> +	struct lpcr_data *fn_data = (struct lpcr_data *)data;
>> +	int ret;
>> +	target_ulong lpcr;
>> +	u64 task_done = 1;
>> +
>> +	if (!fn_data || !fn_data->cpu)
>> +		return;
> This should be hard errors IMHO.
I wanted to avoid hard errors to see if the OS can fail with an OOPS
later. My concern is that hard errors always leave the system in a very
bad state with no scope to debug. I can change that if required
>> +	ret = get_cpu_lpcr(fn_data->cpu, &lpcr);
>> +	if (ret < 0)
>> +		return;
> Uh oh!
>
> It looks like most code calls die() if KVM_SET_ONE_REG fails, that would be
> preferable I think than running some cpus with a different endian :)

>> +	if (fn_data->mode == H_SET_MODE_ENDIAN_BIG)
>> +		lpcr &= ~LPCR_ILE;
>> +	else
>> +		lpcr |= LPCR_ILE;
>> +
>> +	ret = set_cpu_lpcr(fn_data->cpu, &lpcr);
>> +	if (ret < 0)
>> +		return;
>> +
>> +	free(data);
> I don't think we should be doing the free here.
From the point the callback gets the data, it owns it. Otherwise we'd have to
implement an I am done with this processing

>> +	if (write(task_event, &task_done, sizeof(task_done)) < 0)
>> +		pr_warning("Failed to notify of lpcr task done\n");
>> +}
>> +
>> +#define for_each_vcpu(cpu, kvm, i) \
>> +	for ((i) = 0, (cpu) = (kvm)->cpus[i]; (i) < (kvm)->nrcpus; (i)++, (cpu) = (kvm)->cpus[i])
> That should probably be in a header.
Yep, I did not see any use outside this function, but I can move it
>> +static target_ulong h_set_mode(struct kvm_cpu *vcpu, target_ulong opcode, target_ulong *args)
>> +{
>> +	int ret = H_SUCCESS;
> That init should be removed.
OK, I'll revisit the error handling
>> +	struct kvm *kvm = vcpu->kvm;
>> +	struct kvm_cpu *cpu;
>> +	int i;
>> +
>> +	switch (args[1]) {
>> +	case H_SET_MODE_RESOURCE_LE: {
>> +		u64 total_done = 0;
>> +		u64 task_read;
>> +
>> +		task_event = eventfd(0, 0);
>> +		if (task_event < 0) {
>> +			pr_warning("Failed to create task_event");
>> +			break;
> That will return H_SUCCESS which is not OK.
Good catch!
>> +		}
>> +		for_each_vcpu(cpu, kvm, i) {
>> +			struct lpcr_data *data;
>> +
>> +			data = malloc(sizeof(struct lpcr_data));
> Is there any reason not to do this synchronously?
>
> That would allow you to put data on the stack. And also avoid the while loop
> below.
Synchronous implies a two way communication mechanism between this vCPU
and all others to communicate begin and end of a task
>
>> +			if (!data) {
>> +				ret = H_P2;
>> +				break;
>> +			}
>> +			data->cpu = cpu;
>> +			data->mode = args[0];
>> +
>> +			kvm_cpu__queue_task(cpu, set_lpcr_cpu, data);
>> +		}
>> +
>> +		while ((int)total_done < kvm->nrcpus) {
>> +			int err;
>> +			err = read(task_event, &task_read, sizeof(task_read));
>> +			if (err < 0) {
>> +				ret = H_P2;
>> +				break;
>> +			}
>> +			total_done += task_read;
>> +		}
>> +		close(task_event);
>> +		break;
>> +	}
>> +	default:
>> +		ret = H_FUNCTION;
>> +		break;
>> +	}
>> +	return (ret < 0) ? H_P2 : H_SUCCESS;
>> +}
> I think that ends up being correct, but it's pretty obscure. ie. for an
> unsupported resource we should return H_P2, and you get that to happen by
> setting ret to H_FUNCTION (-2).
Good catch! I'll fix this
>
> cheers
>

Thanks for the review
Balbir
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

diff --git a/include/kvm/kvm-cpu.h b/include/kvm/kvm-cpu.h
index aa0cb54..5009681 100644
--- a/include/kvm/kvm-cpu.h
+++ b/include/kvm/kvm-cpu.h
@@ -4,6 +4,11 @@ 
 #include "kvm/kvm-cpu-arch.h"
 #include <stdbool.h>
 
+struct kvm_cpu_task {
+	void (*task)(void *data);
+	void *data;
+};
+
 int kvm_cpu__init(struct kvm *kvm);
 int kvm_cpu__exit(struct kvm *kvm);
 struct kvm_cpu *kvm_cpu__arch_init(struct kvm *kvm, unsigned long cpu_id);
@@ -23,5 +28,7 @@  void kvm_cpu__show_code(struct kvm_cpu *vcpu);
 void kvm_cpu__show_registers(struct kvm_cpu *vcpu);
 void kvm_cpu__show_page_tables(struct kvm_cpu *vcpu);
 void kvm_cpu__arch_nmi(struct kvm_cpu *cpu);
+int kvm_cpu__queue_task(struct kvm_cpu *cpu, void (*task)(void *data),
+			void *data);
 
 #endif /* KVM__KVM_CPU_H */
diff --git a/include/kvm/kvm.h b/include/kvm/kvm.h
index 37155db..731abee 100644
--- a/include/kvm/kvm.h
+++ b/include/kvm/kvm.h
@@ -15,6 +15,7 @@ 
 
 #define SIGKVMEXIT		(SIGRTMIN + 0)
 #define SIGKVMPAUSE		(SIGRTMIN + 1)
+#define SIGKVMTASK		(SIGRTMIN + 2)
 
 #define KVM_PID_FILE_PATH	"/.lkvm/"
 #define HOME_DIR		getenv("HOME")
diff --git a/kvm-cpu.c b/kvm-cpu.c
index ad4441b..438414f 100644
--- a/kvm-cpu.c
+++ b/kvm-cpu.c
@@ -83,10 +83,59 @@  void kvm_cpu__reboot(struct kvm *kvm)
 	}
 }
 
+static void kvm_cpu__run_task(int sig, siginfo_t * info, void *context)
+{
+	union sigval val;
+	struct kvm_cpu_task *task_ptr;
+
+	if (!info) {
+		pr_warning("signal queued without info\n");
+		return;
+	}
+
+	val = info->si_value;
+	task_ptr = val.sival_ptr;
+	if (!task_ptr) {
+		pr_warning("Task queued without data\n");
+		return;
+	}
+
+	if (!task_ptr->task || !task_ptr->data) {
+		pr_warning("Failed to get task information\n");
+		return;
+	}
+
+	task_ptr->task(task_ptr->data);
+	free(task_ptr);
+}
+
+int kvm_cpu__queue_task(struct kvm_cpu *cpu, void (*task)(void *data),
+			void *data)
+{
+	struct kvm_cpu_task *task_ptr = NULL;
+	union sigval val;
+
+	task_ptr = malloc(sizeof(struct kvm_cpu_task));
+	if (!task_ptr)
+		return -ENOMEM;
+
+	task_ptr->task = task;
+	task_ptr->data = data;
+	val.sival_ptr = task_ptr;
+
+	pthread_sigqueue(cpu->thread, SIGKVMTASK, val);
+	return 0;
+}
+
 int kvm_cpu__start(struct kvm_cpu *cpu)
 {
 	sigset_t sigset;
 
+	struct sigaction action = {
+		.sa_sigaction = kvm_cpu__run_task,
+		.sa_flags = SA_SIGINFO,
+	};
+
 	sigemptyset(&sigset);
 	sigaddset(&sigset, SIGALRM);
 
@@ -94,6 +143,7 @@  int kvm_cpu__start(struct kvm_cpu *cpu)
 
 	signal(SIGKVMEXIT, kvm_cpu_signal_handler);
 	signal(SIGKVMPAUSE, kvm_cpu_signal_handler);
+	sigaction(SIGKVMTASK, &action, NULL);
 
 	kvm_cpu__reset_vcpu(cpu);
 
diff --git a/powerpc/include/kvm/kvm-cpu-arch.h b/powerpc/include/kvm/kvm-cpu-arch.h
index 01eafdf..033b702 100644
--- a/powerpc/include/kvm/kvm-cpu-arch.h
+++ b/powerpc/include/kvm/kvm-cpu-arch.h
@@ -38,6 +38,8 @@ 
 
 #define POWER7_EXT_IRQ	0
 
+#define LPCR_ILE (1 << (63-38))
+
 struct kvm;
 
 struct kvm_cpu {
diff --git a/powerpc/kvm.c b/powerpc/kvm.c
index d147e0c..2dbd0fe 100644
--- a/powerpc/kvm.c
+++ b/powerpc/kvm.c
@@ -286,7 +286,7 @@  static int setup_fdt(struct kvm *kvm)
 	uint32_t	int_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
 	char 		hypertas_prop_kvm[] = "hcall-pft\0hcall-term\0"
 		"hcall-dabr\0hcall-interrupt\0hcall-tce\0hcall-vio\0"
-		"hcall-splpar\0hcall-bulk";
+		"hcall-splpar\0hcall-bulk\0hcall-set-mode";
 	int 		i, j;
 	char 		cpu_name[30];
 	u8		staging_fdt[FDT_MAX_SIZE];
diff --git a/powerpc/spapr.h b/powerpc/spapr.h
index 8b294d1..f851f4a 100644
--- a/powerpc/spapr.h
+++ b/powerpc/spapr.h
@@ -27,7 +27,7 @@  typedef uintptr_t target_phys_addr_t;
 #define H_HARDWARE	-1	/* Hardware error */
 #define H_FUNCTION	-2	/* Function not supported */
 #define H_PARAMETER	-4	/* Parameter invalid, out-of-range or conflicting */
-
+#define H_P2		-55
 #define H_SET_DABR		0x28
 #define H_LOGICAL_CI_LOAD	0x3c
 #define H_LOGICAL_CI_STORE	0x40
@@ -41,7 +41,18 @@  typedef uintptr_t target_phys_addr_t;
 #define H_EOI			0x64
 #define H_IPI			0x6c
 #define H_XIRR			0x74
-#define MAX_HCALL_OPCODE	H_XIRR
+#define H_SET_MODE		0x31C
+#define MAX_HCALL_OPCODE	H_SET_MODE
+
+/* Values for 2nd argument to H_SET_MODE */
+#define H_SET_MODE_RESOURCE_SET_CIABR		1
+#define H_SET_MODE_RESOURCE_SET_DAWR		2
+#define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE	3
+#define H_SET_MODE_RESOURCE_LE			4
+
+/* Flags for H_SET_MODE_RESOURCE_LE */
+#define H_SET_MODE_ENDIAN_BIG		0
+#define H_SET_MODE_ENDIAN_LITTLE	1
 
 /*
  * The hcalls above are standardized in PAPR and implemented by pHyp
diff --git a/powerpc/spapr_hcall.c b/powerpc/spapr_hcall.c
index ff1d63a..682fad5 100644
--- a/powerpc/spapr_hcall.c
+++ b/powerpc/spapr_hcall.c
@@ -18,6 +18,9 @@ 
 
 #include <stdio.h>
 #include <assert.h>
+#include <sys/eventfd.h>
+
+static int task_event;
 
 static spapr_hcall_fn papr_hypercall_table[(MAX_HCALL_OPCODE / 4) + 1];
 static spapr_hcall_fn kvmppc_hypercall_table[KVMPPC_HCALL_MAX -
@@ -74,6 +77,113 @@  static target_ulong h_logical_dcbf(struct kvm_cpu *vcpu, target_ulong opcode, ta
 	return H_SUCCESS;
 }
 
+struct lpcr_data {
+	struct kvm_cpu	*cpu;
+	int		mode;
+};
+
+static int get_cpu_lpcr(struct kvm_cpu *vcpu, target_ulong *lpcr)
+{
+	struct kvm_one_reg reg = {
+		.id = KVM_REG_PPC_LPCR_64,
+		.addr = (__u64)lpcr
+	};
+
+	return ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, &reg);
+}
+
+static int set_cpu_lpcr(struct kvm_cpu *vcpu, target_ulong *lpcr)
+{
+	struct kvm_one_reg reg = {
+		.id = KVM_REG_PPC_LPCR_64,
+		.addr = (__u64)lpcr
+	};
+
+	return ioctl(vcpu->vcpu_fd, KVM_SET_ONE_REG, &reg);
+}
+
+static void set_lpcr_cpu(void *data)
+{
+	struct lpcr_data *fn_data = (struct lpcr_data *)data;
+	int ret;
+	target_ulong lpcr;
+	u64 task_done = 1;
+
+	if (!fn_data || !fn_data->cpu)
+		return;
+
+	ret = get_cpu_lpcr(fn_data->cpu, &lpcr);
+	if (ret < 0)
+		return;
+
+	if (fn_data->mode == H_SET_MODE_ENDIAN_BIG)
+		lpcr &= ~LPCR_ILE;
+	else
+		lpcr |= LPCR_ILE;
+
+	ret = set_cpu_lpcr(fn_data->cpu, &lpcr);
+	if (ret < 0)
+		return;
+
+	free(data);
+	if (write(task_event, &task_done, sizeof(task_done)) < 0)
+		pr_warning("Failed to notify of lpcr task done\n");
+}
+
+#define for_each_vcpu(cpu, kvm, i) \
+	for ((i) = 0, (cpu) = (kvm)->cpus[i]; (i) < (kvm)->nrcpus; (i)++, (cpu) = (kvm)->cpus[i])
+
+static target_ulong h_set_mode(struct kvm_cpu *vcpu, target_ulong opcode, target_ulong *args)
+{
+	int ret = H_SUCCESS;
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_cpu *cpu;
+	int i;
+
+	switch (args[1]) {
+	case H_SET_MODE_RESOURCE_LE: {
+		u64 total_done = 0;
+		u64 task_read;
+
+		task_event = eventfd(0, 0);
+		if (task_event < 0) {
+			pr_warning("Failed to create task_event");
+			break;
+		}
+		for_each_vcpu(cpu, kvm, i) {
+			struct lpcr_data *data;
+
+			data = malloc(sizeof(struct lpcr_data));
+			if (!data) {
+				ret = H_P2;
+				break;
+			}
+			data->cpu = cpu;
+			data->mode = args[0];
+
+			kvm_cpu__queue_task(cpu, set_lpcr_cpu, data);
+		}
+
+		while ((int)total_done < kvm->nrcpus) {
+			int err;
+			err = read(task_event, &task_read, sizeof(task_read));
+			if (err < 0) {
+				ret = H_P2;
+				break;
+			}
+			total_done += task_read;
+		}
+		close(task_event);
+		break;
+	}
+	default:
+		ret = H_FUNCTION;
+		break;
+	}
+	return (ret < 0) ? H_P2 : H_SUCCESS;
+}
+
+
 void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn)
 {
 	spapr_hcall_fn *slot;
@@ -128,6 +238,7 @@  void hypercall_init(void)
 	spapr_register_hypercall(H_LOGICAL_CACHE_STORE, h_logical_store);
 	spapr_register_hypercall(H_LOGICAL_ICBI, h_logical_icbi);
 	spapr_register_hypercall(H_LOGICAL_DCBF, h_logical_dcbf);
+	spapr_register_hypercall(H_SET_MODE, h_set_mode);
 
 	/* KVM-PPC specific hcalls */
 	spapr_register_hypercall(KVMPPC_H_RTAS, h_rtas);

[2/4] Implement H_SET_MODE for ppc64le

Commit Message

Comments

Patch