diff mbox

[v8,09/10] drivers: qcom: rpmh: add support for batch RPMH request

Message ID 20180509170159.29682-10-ilina@codeaurora.org (mailing list archive)
State Not Applicable, archived
Delegated to: Andy Gross
Headers show

Commit Message

Lina Iyer May 9, 2018, 5:01 p.m. UTC
Platform drivers need make a lot of resource state requests at the same
time, say, at the start or end of an usecase. It can be quite
inefficient to send each request separately. Instead they can give the
RPMH library a batch of requests to be sent and wait on the whole
transaction to be complete.

rpmh_write_batch() is a blocking call that can be used to send multiple
RPMH command sets. Each RPMH command set is set asynchronously and the
API blocks until all the command sets are complete and receive their
tx_done callbacks.

Signed-off-by: Lina Iyer <ilina@codeaurora.org>
---

Changes in v7:
	- Check for loop out of bounds

Changes in v6:
	- replace rpmh_client with device *
Changes in v4:
	- reorganize rpmh_write_batch()
	- introduce wait_count here, instead of patch#4
---
 drivers/soc/qcom/rpmh.c | 154 +++++++++++++++++++++++++++++++++++++++-
 include/soc/qcom/rpmh.h |   8 +++
 2 files changed, 160 insertions(+), 2 deletions(-)

Comments

Matthias Kaehlcke May 9, 2018, 10:03 p.m. UTC | #1
On Wed, May 09, 2018 at 11:01:58AM -0600, Lina Iyer wrote:
> Platform drivers need make a lot of resource state requests at the same
> time, say, at the start or end of an usecase. It can be quite
> inefficient to send each request separately. Instead they can give the
> RPMH library a batch of requests to be sent and wait on the whole
> transaction to be complete.
> 
> rpmh_write_batch() is a blocking call that can be used to send multiple
> RPMH command sets. Each RPMH command set is set asynchronously and the
> API blocks until all the command sets are complete and receive their
> tx_done callbacks.
> 
> Signed-off-by: Lina Iyer <ilina@codeaurora.org>
> ---
> 
> Changes in v7:
> 	- Check for loop out of bounds
> 
> Changes in v6:
> 	- replace rpmh_client with device *
> Changes in v4:
> 	- reorganize rpmh_write_batch()
> 	- introduce wait_count here, instead of patch#4
> ---
>  drivers/soc/qcom/rpmh.c | 154 +++++++++++++++++++++++++++++++++++++++-
>  include/soc/qcom/rpmh.h |   8 +++
>  2 files changed, 160 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/soc/qcom/rpmh.c b/drivers/soc/qcom/rpmh.c
> index 1bb62876795c..a0e277b4b846 100644
> --- a/drivers/soc/qcom/rpmh.c
> +++ b/drivers/soc/qcom/rpmh.c
>
> ...
>
> +static int flush_batch(struct rpmh_ctrlr *ctrlr)
> +{
> +	const struct rpmh_request *rpm_msg;
> +	unsigned long flags;
> +	int ret = 0;
> +	int i;
> +
> +	/* Send Sleep/Wake requests to the controller, expect no response */
> +	spin_lock_irqsave(&ctrlr->lock, flags);
> +	for (i = 0; ctrlr->batch_cache[i]; i++) {

I missed this earlier: the loop goes beyond ctrlr->batch_cache when
the batch cache is full.

> +		rpm_msg = ctrlr->batch_cache[i];
> +		ret = rpmh_rsc_write_ctrl_data(ctrlr->drv, &rpm_msg->msg);
> +		if (ret)
> +			break;
> +	}
> +	spin_unlock_irqrestore(&ctrlr->lock, flags);
> +
> +	return ret;
> +}
> +
> +static void invalidate_batch(struct rpmh_ctrlr *ctrlr)
> +{
> +	unsigned long flags;
> +	int index = 0;
> +
> +	spin_lock_irqsave(&ctrlr->lock, flags);
> +	while (ctrlr->batch_cache[index]) {

This still goes beyond ctrlr->batch_cache when the batch cache is
full.
--
To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lina Iyer May 10, 2018, 3:17 p.m. UTC | #2
On Wed, May 09 2018 at 16:03 -0600, Matthias Kaehlcke wrote:
>On Wed, May 09, 2018 at 11:01:58AM -0600, Lina Iyer wrote:
>> Platform drivers need make a lot of resource state requests at the same
>> time, say, at the start or end of an usecase. It can be quite
>> inefficient to send each request separately. Instead they can give the
>> RPMH library a batch of requests to be sent and wait on the whole
>> transaction to be complete.
>>
>> rpmh_write_batch() is a blocking call that can be used to send multiple
>> RPMH command sets. Each RPMH command set is set asynchronously and the
>> API blocks until all the command sets are complete and receive their
>> tx_done callbacks.
>>
>> Signed-off-by: Lina Iyer <ilina@codeaurora.org>
>> ---
>>
>> Changes in v7:
>> 	- Check for loop out of bounds
>>
>> Changes in v6:
>> 	- replace rpmh_client with device *
>> Changes in v4:
>> 	- reorganize rpmh_write_batch()
>> 	- introduce wait_count here, instead of patch#4
>> ---
>>  drivers/soc/qcom/rpmh.c | 154 +++++++++++++++++++++++++++++++++++++++-
>>  include/soc/qcom/rpmh.h |   8 +++
>>  2 files changed, 160 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/soc/qcom/rpmh.c b/drivers/soc/qcom/rpmh.c
>> index 1bb62876795c..a0e277b4b846 100644
>> --- a/drivers/soc/qcom/rpmh.c
>> +++ b/drivers/soc/qcom/rpmh.c
>>
>> ...
>>
>> +static int flush_batch(struct rpmh_ctrlr *ctrlr)
>> +{
>> +	const struct rpmh_request *rpm_msg;
>> +	unsigned long flags;
>> +	int ret = 0;
>> +	int i;
>> +
>> +	/* Send Sleep/Wake requests to the controller, expect no response */
>> +	spin_lock_irqsave(&ctrlr->lock, flags);
>> +	for (i = 0; ctrlr->batch_cache[i]; i++) {
>
>I missed this earlier: the loop goes beyond ctrlr->batch_cache when
>the batch cache is full.
>
>> +		rpm_msg = ctrlr->batch_cache[i];
>> +		ret = rpmh_rsc_write_ctrl_data(ctrlr->drv, &rpm_msg->msg);
>> +		if (ret)
>> +			break;
>> +	}
>> +	spin_unlock_irqrestore(&ctrlr->lock, flags);
>> +
>> +	return ret;
>> +}
>> +
>> +static void invalidate_batch(struct rpmh_ctrlr *ctrlr)
>> +{
>> +	unsigned long flags;
>> +	int index = 0;
>> +
>> +	spin_lock_irqsave(&ctrlr->lock, flags);
>> +	while (ctrlr->batch_cache[index]) {
>
>This still goes beyond ctrlr->batch_cache when the batch cache is
>full.
I will check through the code for all out-of-bounds. Seems like I have
not worried about it too much. Well the space here assigned is beyond
what we see on a production device. Neverthless, it needs to be checked.

Thanks for your review Matthias.

-- Lina

--
To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Douglas Anderson May 11, 2018, 8:19 p.m. UTC | #3
Hi,

On Wed, May 9, 2018 at 10:01 AM, Lina Iyer <ilina@codeaurora.org> wrote:
>  /**
> @@ -77,12 +82,14 @@ struct rpmh_request {
>   * @cache: the list of cached requests
>   * @lock: synchronize access to the controller data
>   * @dirty: was the cache updated since flush
> + * @batch_cache: Cache sleep and wake requests sent as batch
>   */
>  struct rpmh_ctrlr {
>         struct rsc_drv *drv;
>         struct list_head cache;
>         spinlock_t lock;
>         bool dirty;
> +       const struct rpmh_request *batch_cache[RPMH_MAX_BATCH_CACHE];

I'm pretty confused about why the "batch_cache" is separate from the
normal cache.  As far as I can tell the purpose of the two is the same
but you have two totally separate code paths and data structures.


>  };
>
>  static struct rpmh_ctrlr rpmh_rsc[RPMH_MAX_CTRLR];
> @@ -133,6 +140,7 @@ void rpmh_tx_done(const struct tcs_request *msg, int r)
>         struct rpmh_request *rpm_msg = container_of(msg, struct rpmh_request,
>                                                     msg);
>         struct completion *compl = rpm_msg->completion;
> +       atomic_t *wc = rpm_msg->wait_count;
>
>         rpm_msg->err = r;
>
> @@ -143,8 +151,13 @@ void rpmh_tx_done(const struct tcs_request *msg, int r)
>         kfree(rpm_msg->free);
>
>         /* Signal the blocking thread we are done */
> -       if (compl)
> -               complete(compl);
> +       if (!compl)
> +               return;

The comment above this "if" block no longer applies to the line next
to it after your patch.  ...but below I suggest you get rid of
"wait_count", so maybe this part of the patch will go away.


> +static int cache_batch(struct rpmh_ctrlr *ctrlr,
> +                      struct rpmh_request **rpm_msg, int count)
> +{
> +       unsigned long flags;
> +       int ret = 0;
> +       int index = 0;
> +       int i;
> +
> +       spin_lock_irqsave(&ctrlr->lock, flags);
> +       while (index < RPMH_MAX_BATCH_CACHE && ctrlr->batch_cache[index])
> +               index++;
> +       if (index + count >= RPMH_MAX_BATCH_CACHE) {
> +               ret = -ENOMEM;
> +               goto fail;
> +       }
> +
> +       for (i = 0; i < count; i++)
> +               ctrlr->batch_cache[index + i] = rpm_msg[i];
> +fail:

Nit: this label is for both failure and normal exit, so call it "exit".


> +       spin_unlock_irqrestore(&ctrlr->lock, flags);
> +
> +       return ret;
> +}

As part of my overall confusion about why the batch cache is different
than the normal one: for the normal use case you still call
rpmh_rsc_write_ctrl_data() for things you put in your cache, but you
don't for the batch cache.  I still haven't totally figured out what
rpmh_rsc_write_ctrl_data() does, but it seems strange that you don't
do it for the batch cache but you do for the other one.


> +/**
> + * rpmh_write_batch: Write multiple sets of RPMH commands and wait for the
> + * batch to finish.
> + *
> + * @dev: the device making the request
> + * @state: Active/sleep set
> + * @cmd: The payload data
> + * @n: The array of count of elements in each batch, 0 terminated.
> + *
> + * Write a request to the RSC controller without caching. If the request
> + * state is ACTIVE, then the requests are treated as completion request
> + * and sent to the controller immediately. The function waits until all the
> + * commands are complete. If the request was to SLEEP or WAKE_ONLY, then the
> + * request is sent as fire-n-forget and no ack is expected.
> + *
> + * May sleep. Do not call from atomic contexts for ACTIVE_ONLY requests.
> + */
> +int rpmh_write_batch(const struct device *dev, enum rpmh_state state,
> +                    const struct tcs_cmd *cmd, u32 *n)
> +{
> +       struct rpmh_request *rpm_msg[RPMH_MAX_REQ_IN_BATCH] = { NULL };
> +       DECLARE_COMPLETION_ONSTACK(compl);
> +       atomic_t wait_count = ATOMIC_INIT(0);
> +       struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
> +       int count = 0;
> +       int ret, i;
> +
> +       if (IS_ERR(ctrlr) || !cmd || !n)
> +               return -EINVAL;
> +
> +       while (n[count++] > 0)
> +               ;
> +       count--;
> +       if (!count || count > RPMH_MAX_REQ_IN_BATCH)
> +               return -EINVAL;
> +
> +       for (i = 0; i < count; i++) {
> +               rpm_msg[i] = __get_rpmh_msg_async(state, cmd, n[i]);
> +               if (IS_ERR_OR_NULL(rpm_msg[i])) {

Just "IS_ERR".  It's never NULL.

...also add a i-- somewhere in here or you're going to be kfree()ing
your error value, aren't you?


> +                       ret = PTR_ERR(rpm_msg[i]);
> +                       for (; i >= 0; i--)
> +                               kfree(rpm_msg[i]->free);
> +                       return ret;
> +               }
> +               cmd += n[i];
> +       }
> +
> +       if (state != RPMH_ACTIVE_ONLY_STATE)
> +               return cache_batch(ctrlr, rpm_msg, count);

Don't you need to free rpm_msg items in this case?


> +
> +       atomic_set(&wait_count, count);
> +
> +       for (i = 0; i < count; i++) {
> +               rpm_msg[i]->completion = &compl;
> +               rpm_msg[i]->wait_count = &wait_count;
> +               ret = rpmh_rsc_send_data(ctrlr->drv, &rpm_msg[i]->msg);
> +               if (ret) {
> +                       int j;
> +
> +                       pr_err("Error(%d) sending RPMH message addr=%#x\n",
> +                              ret, rpm_msg[i]->msg.cmds[0].addr);
> +                       for (j = i; j < count; j++)
> +                               rpmh_tx_done(&rpm_msg[j]->msg, ret);

You're just using rpmh_tx_done() to free memory?  Note that you'll
probably do your error handling in this function a favor if you rename
__get_rpmh_msg_async() to __fill_rpmh_msg() and remove the memory
allocation from there.  Then you can do one big allocation of the
whole array in rpmh_write_batch() and then you'll only have one free
at the end...



> +                       break;

"break" seems wrong here.  You'll end up waiting for the completion,
then I guess timing out, then returning -ETIMEDOUT?


> +               }
> +       }
> +
> +       ret = wait_for_completion_timeout(&compl, RPMH_TIMEOUT_MS);

The "wait_count" abstraction is confusing and I believe it's not
needed.  I think you can remove it and change the above to this
(untested) code:

time_left = RPMH_TIMEOUT_MS;
for (i = 0; i < count; i++) {
  time_left = wait_for_completion_timeout(&compl, time_left);
  if (!time_left)
    return -ETIMEDOUT;
}

...specifically completions are additive, so just wait "count" times
and then the reader doesn't need to learn your new wait_count
abstraction and try to reason about it.

...and, actually, I argue in other replies that this should't use a
timeout, so even cleaner:

for (i = 0; i < count; i++)
  wait_for_completion(&compl);


Once you do that, you can also get rid of the need to pre-count "n",
so all your loops turn into:

for (i = 0; n[i]; i++)


I suppose you might want to get rid of "RPMH_MAX_REQ_IN_BATCH" and
dynamically allocate your array too, but that seems sane.  As per
above it seems like you should just dynamically allocate a whole array
of "struct rpmh_request" items at once anyway.

---

> +       return (ret > 0) ? 0 : -ETIMEDOUT;
> +
> +}
> +EXPORT_SYMBOL(rpmh_write_batch);

Perhaps an even simpler thing than taking all my advice above: can't
you just add a optional completion to rpmh_write_async()?  That would
just be stuffed into rpm_msg.

Now your batch code would just be a bunch of calls to
rpmh_write_async() with an equal number of wait_for_completion() calls
at the end.  Is there a reason that wouldn't work?  You'd get rid of
_a lot_ of code.


-Doug
--
To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lina Iyer May 14, 2018, 7:59 p.m. UTC | #4
Hi Doug,

Will explain only the key points now.

On Fri, May 11 2018 at 14:19 -0600, Doug Anderson wrote:
>Hi,
>
>On Wed, May 9, 2018 at 10:01 AM, Lina Iyer <ilina@codeaurora.org> wrote:
>>  /**
>> @@ -77,12 +82,14 @@ struct rpmh_request {
>>   * @cache: the list of cached requests
>>   * @lock: synchronize access to the controller data
>>   * @dirty: was the cache updated since flush
>> + * @batch_cache: Cache sleep and wake requests sent as batch
>>   */
>>  struct rpmh_ctrlr {
>>         struct rsc_drv *drv;
>>         struct list_head cache;
>>         spinlock_t lock;
>>         bool dirty;
>> +       const struct rpmh_request *batch_cache[RPMH_MAX_BATCH_CACHE];
>
>I'm pretty confused about why the "batch_cache" is separate from the
>normal cache.  As far as I can tell the purpose of the two is the same
>but you have two totally separate code paths and data structures.
>
Due to a hardware limitation, requests made by bus drivers must be set
up in the sleep and wake TCS first before setting up the requests from
other drivers. Bus drivers use batch mode for any and all RPMH
communication. Hence their request are the only ones in the batch_cache.


>>  };
>>
>>  static struct rpmh_ctrlr rpmh_rsc[RPMH_MAX_CTRLR];
>> @@ -133,6 +140,7 @@ void rpmh_tx_done(const struct tcs_request *msg, int r)
>>         struct rpmh_request *rpm_msg = container_of(msg, struct rpmh_request,
>>                                                     msg);
>>         struct completion *compl = rpm_msg->completion;
>> +       atomic_t *wc = rpm_msg->wait_count;
>>
>>         rpm_msg->err = r;
>>
>> @@ -143,8 +151,13 @@ void rpmh_tx_done(const struct tcs_request *msg, int r)
>>         kfree(rpm_msg->free);
>>
>>         /* Signal the blocking thread we are done */
>> -       if (compl)
>> -               complete(compl);
>> +       if (!compl)
>> +               return;
>
>The comment above this "if" block no longer applies to the line next
>to it after your patch.  ...but below I suggest you get rid of
>"wait_count", so maybe this part of the patch will go away.
>
>
>> +static int cache_batch(struct rpmh_ctrlr *ctrlr,
>> +                      struct rpmh_request **rpm_msg, int count)
>> +{
>> +       unsigned long flags;
>> +       int ret = 0;
>> +       int index = 0;
>> +       int i;
>> +
>> +       spin_lock_irqsave(&ctrlr->lock, flags);
>> +       while (index < RPMH_MAX_BATCH_CACHE && ctrlr->batch_cache[index])
>> +               index++;
>> +       if (index + count >= RPMH_MAX_BATCH_CACHE) {
>> +               ret = -ENOMEM;
>> +               goto fail;
>> +       }
>> +
>> +       for (i = 0; i < count; i++)
>> +               ctrlr->batch_cache[index + i] = rpm_msg[i];
>> +fail:
>
>Nit: this label is for both failure and normal exit, so call it "exit".
>
>
>> +       spin_unlock_irqrestore(&ctrlr->lock, flags);
>> +
>> +       return ret;
>> +}
>
>As part of my overall confusion about why the batch cache is different
>than the normal one: for the normal use case you still call
>rpmh_rsc_write_ctrl_data() for things you put in your cache, but you
>don't for the batch cache.  I still haven't totally figured out what
>rpmh_rsc_write_ctrl_data() does, but it seems strange that you don't
>do it for the batch cache but you do for the other one.
>
>
flush_batch does write to the controller using
rpmh_rsc_write_ctrl_data()

Thanks,
Lina

>> +/**
>> + * rpmh_write_batch: Write multiple sets of RPMH commands and wait for the
>> + * batch to finish.
>> + *
>> + * @dev: the device making the request
>> + * @state: Active/sleep set
>> + * @cmd: The payload data
>> + * @n: The array of count of elements in each batch, 0 terminated.
>> + *
>> + * Write a request to the RSC controller without caching. If the request
>> + * state is ACTIVE, then the requests are treated as completion request
>> + * and sent to the controller immediately. The function waits until all the
>> + * commands are complete. If the request was to SLEEP or WAKE_ONLY, then the
>> + * request is sent as fire-n-forget and no ack is expected.
>> + *
>> + * May sleep. Do not call from atomic contexts for ACTIVE_ONLY requests.
>> + */
>> +int rpmh_write_batch(const struct device *dev, enum rpmh_state state,
>> +                    const struct tcs_cmd *cmd, u32 *n)
>> +{
>> +       struct rpmh_request *rpm_msg[RPMH_MAX_REQ_IN_BATCH] = { NULL };
>> +       DECLARE_COMPLETION_ONSTACK(compl);
>> +       atomic_t wait_count = ATOMIC_INIT(0);
>> +       struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
>> +       int count = 0;
>> +       int ret, i;
>> +
>> +       if (IS_ERR(ctrlr) || !cmd || !n)
>> +               return -EINVAL;
>> +
>> +       while (n[count++] > 0)
>> +               ;
>> +       count--;
>> +       if (!count || count > RPMH_MAX_REQ_IN_BATCH)
>> +               return -EINVAL;
>> +
>> +       for (i = 0; i < count; i++) {
>> +               rpm_msg[i] = __get_rpmh_msg_async(state, cmd, n[i]);
>> +               if (IS_ERR_OR_NULL(rpm_msg[i])) {
>
>Just "IS_ERR".  It's never NULL.
>
>...also add a i-- somewhere in here or you're going to be kfree()ing
>your error value, aren't you?
>
>
>> +                       ret = PTR_ERR(rpm_msg[i]);
>> +                       for (; i >= 0; i--)
>> +                               kfree(rpm_msg[i]->free);
>> +                       return ret;
>> +               }
>> +               cmd += n[i];
>> +       }
>> +
>> +       if (state != RPMH_ACTIVE_ONLY_STATE)
>> +               return cache_batch(ctrlr, rpm_msg, count);
>
>Don't you need to free rpm_msg items in this case?
>
>
>> +
>> +       atomic_set(&wait_count, count);
>> +
>> +       for (i = 0; i < count; i++) {
>> +               rpm_msg[i]->completion = &compl;
>> +               rpm_msg[i]->wait_count = &wait_count;
>> +               ret = rpmh_rsc_send_data(ctrlr->drv, &rpm_msg[i]->msg);
>> +               if (ret) {
>> +                       int j;
>> +
>> +                       pr_err("Error(%d) sending RPMH message addr=%#x\n",
>> +                              ret, rpm_msg[i]->msg.cmds[0].addr);
>> +                       for (j = i; j < count; j++)
>> +                               rpmh_tx_done(&rpm_msg[j]->msg, ret);
>
>You're just using rpmh_tx_done() to free memory?  Note that you'll
>probably do your error handling in this function a favor if you rename
>__get_rpmh_msg_async() to __fill_rpmh_msg() and remove the memory
>allocation from there.  Then you can do one big allocation of the
>whole array in rpmh_write_batch() and then you'll only have one free
>at the end...
>
>
>
>> +                       break;
>
>"break" seems wrong here.  You'll end up waiting for the completion,
>then I guess timing out, then returning -ETIMEDOUT?
>
>
>> +               }
>> +       }
>> +
>> +       ret = wait_for_completion_timeout(&compl, RPMH_TIMEOUT_MS);
>
>The "wait_count" abstraction is confusing and I believe it's not
>needed.  I think you can remove it and change the above to this
>(untested) code:
>
>time_left = RPMH_TIMEOUT_MS;
>for (i = 0; i < count; i++) {
>  time_left = wait_for_completion_timeout(&compl, time_left);
>  if (!time_left)
>    return -ETIMEDOUT;
>}
>
>...specifically completions are additive, so just wait "count" times
>and then the reader doesn't need to learn your new wait_count
>abstraction and try to reason about it.
>
>...and, actually, I argue in other replies that this should't use a
>timeout, so even cleaner:
>
>for (i = 0; i < count; i++)
>  wait_for_completion(&compl);
>
>
>Once you do that, you can also get rid of the need to pre-count "n",
>so all your loops turn into:
>
>for (i = 0; n[i]; i++)
>
>
>I suppose you might want to get rid of "RPMH_MAX_REQ_IN_BATCH" and
>dynamically allocate your array too, but that seems sane.  As per
>above it seems like you should just dynamically allocate a whole array
>of "struct rpmh_request" items at once anyway.
>
>---
>
>> +       return (ret > 0) ? 0 : -ETIMEDOUT;
>> +
>> +}
>> +EXPORT_SYMBOL(rpmh_write_batch);
>
>Perhaps an even simpler thing than taking all my advice above: can't
>you just add a optional completion to rpmh_write_async()?  That would
>just be stuffed into rpm_msg.
>
>Now your batch code would just be a bunch of calls to
>rpmh_write_async() with an equal number of wait_for_completion() calls
>at the end.  Is there a reason that wouldn't work?  You'd get rid of
>_a lot_ of code.
>
>
>-Doug
--
To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Douglas Anderson May 15, 2018, 3:52 p.m. UTC | #5
Hi,

On Mon, May 14, 2018 at 12:59 PM, Lina Iyer <ilina@codeaurora.org> wrote:

>>>  /**
>>> @@ -77,12 +82,14 @@ struct rpmh_request {
>>>   * @cache: the list of cached requests
>>>   * @lock: synchronize access to the controller data
>>>   * @dirty: was the cache updated since flush
>>> + * @batch_cache: Cache sleep and wake requests sent as batch
>>>   */
>>>  struct rpmh_ctrlr {
>>>         struct rsc_drv *drv;
>>>         struct list_head cache;
>>>         spinlock_t lock;
>>>         bool dirty;
>>> +       const struct rpmh_request *batch_cache[RPMH_MAX_BATCH_CACHE];
>>
>>
>> I'm pretty confused about why the "batch_cache" is separate from the
>> normal cache.  As far as I can tell the purpose of the two is the same
>> but you have two totally separate code paths and data structures.
>>
> Due to a hardware limitation, requests made by bus drivers must be set
> up in the sleep and wake TCS first before setting up the requests from
> other drivers. Bus drivers use batch mode for any and all RPMH
> communication. Hence their request are the only ones in the batch_cache.

This is totally not obvious and not commented.  Why not rename to
"priority" instead of "batch"?

If the only requirement is that these come first, that's still no
reason to use totally separate code paths and mechanisms.  These
requests can still use the same data structures / functions and just
be ordered to come first, can't they?  ...or given a boolean
"priority" and you can do two passes through your queue: one to do the
priority ones and one to do the secondary ones.


>>> +       spin_unlock_irqrestore(&ctrlr->lock, flags);
>>> +
>>> +       return ret;
>>> +}
>>
>>
>> As part of my overall confusion about why the batch cache is different
>> than the normal one: for the normal use case you still call
>> rpmh_rsc_write_ctrl_data() for things you put in your cache, but you
>> don't for the batch cache.  I still haven't totally figured out what
>> rpmh_rsc_write_ctrl_data() does, but it seems strange that you don't
>> do it for the batch cache but you do for the other one.
>>
>>
> flush_batch does write to the controller using
> rpmh_rsc_write_ctrl_data()

My confusion is that they happen at different times.  As I understand it:

* For the normal case, you immediately calls
rpmh_rsc_write_ctrl_data() and then later do the rest of the work.

* For the batch case, you call both later.

Is there a good reason for this, or is it just an arbitrary
difference?  If there's a good reason, it should be documented.


-Doug
--
To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lina Iyer May 15, 2018, 4:23 p.m. UTC | #6
On Tue, May 15 2018 at 09:52 -0600, Doug Anderson wrote:
>Hi,
>
>On Mon, May 14, 2018 at 12:59 PM, Lina Iyer <ilina@codeaurora.org> wrote:
>
>>>>  /**
>>>> @@ -77,12 +82,14 @@ struct rpmh_request {
>>>>   * @cache: the list of cached requests
>>>>   * @lock: synchronize access to the controller data
>>>>   * @dirty: was the cache updated since flush
>>>> + * @batch_cache: Cache sleep and wake requests sent as batch
>>>>   */
>>>>  struct rpmh_ctrlr {
>>>>         struct rsc_drv *drv;
>>>>         struct list_head cache;
>>>>         spinlock_t lock;
>>>>         bool dirty;
>>>> +       const struct rpmh_request *batch_cache[RPMH_MAX_BATCH_CACHE];
>>>
>>>
>>> I'm pretty confused about why the "batch_cache" is separate from the
>>> normal cache.  As far as I can tell the purpose of the two is the same
>>> but you have two totally separate code paths and data structures.
>>>
>> Due to a hardware limitation, requests made by bus drivers must be set
>> up in the sleep and wake TCS first before setting up the requests from
>> other drivers. Bus drivers use batch mode for any and all RPMH
>> communication. Hence their request are the only ones in the batch_cache.
>
>This is totally not obvious and not commented.  Why not rename to
>"priority" instead of "batch"?
>
>If the only requirement is that these come first, that's still no
>reason to use totally separate code paths and mechanisms.  These
>requests can still use the same data structures / functions and just
>be ordered to come first, can't they?  ...or given a boolean
>"priority" and you can do two passes through your queue: one to do the
>priority ones and one to do the secondary ones.
>
>
The bus requests have a certain order and cannot be mutated by the
RPMH driver. It has to be maintained in the TCS in the same order.
Also, the bus requests have quite a churn during the course of an
usecase. They are setup and invalidated often.
It is faster to have them separate and invalidate the whole lot of the
batch_cache instead of intertwining them with requests from other
drivers and then figuring out which all must be invalidated and rebuilt
when the next batch requests comes in. Remember, that requests may come
from any driver any time and therefore will be mangled if we use the
same data structure. So to be faster and to avoid having mangled requests
in the TCS, I have kept the data structures separate.

>>>> +       spin_unlock_irqrestore(&ctrlr->lock, flags);
>>>> +
>>>> +       return ret;
>>>> +}
>>>
>>>
>>> As part of my overall confusion about why the batch cache is different
>>> than the normal one: for the normal use case you still call
>>> rpmh_rsc_write_ctrl_data() for things you put in your cache, but you
>>> don't for the batch cache.  I still haven't totally figured out what
>>> rpmh_rsc_write_ctrl_data() does, but it seems strange that you don't
>>> do it for the batch cache but you do for the other one.
>>>
>>>
>> flush_batch does write to the controller using
>> rpmh_rsc_write_ctrl_data()
>
>My confusion is that they happen at different times.  As I understand it:
>
>* For the normal case, you immediately calls
>rpmh_rsc_write_ctrl_data() and then later do the rest of the work.
>
>* For the batch case, you call both later.
>
>Is there a good reason for this, or is it just an arbitrary
>difference?  If there's a good reason, it should be documented.
>
>
In both the cases, the requests are cached in the rpmh library and are
only sent to the controller only when the flushed. I am not sure the
work is any different. The rpmh_flush() flushes out batch requests and
then the requests from other drivers.

Thanks,
Lina

--
To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Douglas Anderson May 15, 2018, 4:50 p.m. UTC | #7
Hi,

On Tue, May 15, 2018 at 9:23 AM, Lina Iyer <ilina@codeaurora.org> wrote:
> On Tue, May 15 2018 at 09:52 -0600, Doug Anderson wrote:
>>
>> Hi,
>>
>> On Mon, May 14, 2018 at 12:59 PM, Lina Iyer <ilina@codeaurora.org> wrote:
>>
>>>>>  /**
>>>>> @@ -77,12 +82,14 @@ struct rpmh_request {
>>>>>   * @cache: the list of cached requests
>>>>>   * @lock: synchronize access to the controller data
>>>>>   * @dirty: was the cache updated since flush
>>>>> + * @batch_cache: Cache sleep and wake requests sent as batch
>>>>>   */
>>>>>  struct rpmh_ctrlr {
>>>>>         struct rsc_drv *drv;
>>>>>         struct list_head cache;
>>>>>         spinlock_t lock;
>>>>>         bool dirty;
>>>>> +       const struct rpmh_request *batch_cache[RPMH_MAX_BATCH_CACHE];
>>>>
>>>>
>>>>
>>>> I'm pretty confused about why the "batch_cache" is separate from the
>>>> normal cache.  As far as I can tell the purpose of the two is the same
>>>> but you have two totally separate code paths and data structures.
>>>>
>>> Due to a hardware limitation, requests made by bus drivers must be set
>>> up in the sleep and wake TCS first before setting up the requests from
>>> other drivers. Bus drivers use batch mode for any and all RPMH
>>> communication. Hence their request are the only ones in the batch_cache.
>>
>>
>> This is totally not obvious and not commented.  Why not rename to
>> "priority" instead of "batch"?
>>
>> If the only requirement is that these come first, that's still no
>> reason to use totally separate code paths and mechanisms.  These
>> requests can still use the same data structures / functions and just
>> be ordered to come first, can't they?  ...or given a boolean
>> "priority" and you can do two passes through your queue: one to do the
>> priority ones and one to do the secondary ones.
>>
>>
> The bus requests have a certain order and cannot be mutated by the
> RPMH driver. It has to be maintained in the TCS in the same order.

Please document this requirement in the code.


> Also, the bus requests have quite a churn during the course of an
> usecase. They are setup and invalidated often.
> It is faster to have them separate and invalidate the whole lot of the
> batch_cache instead of intertwining them with requests from other
> drivers and then figuring out which all must be invalidated and rebuilt
> when the next batch requests comes in. Remember, that requests may come
> from any driver any time and therefore will be mangled if we use the
> same data structure. So to be faster and to avoid having mangled requests
> in the TCS, I have kept the data structures separate.

If you need to find a certain group of requests then can't you just
tag them and it's easy to find them?  I'm still not seeing the need
for a whole separate code path and data structure.


>>>>> +       spin_unlock_irqrestore(&ctrlr->lock, flags);
>>>>> +
>>>>> +       return ret;
>>>>> +}
>>>>
>>>>
>>>>
>>>> As part of my overall confusion about why the batch cache is different
>>>> than the normal one: for the normal use case you still call
>>>> rpmh_rsc_write_ctrl_data() for things you put in your cache, but you
>>>> don't for the batch cache.  I still haven't totally figured out what
>>>> rpmh_rsc_write_ctrl_data() does, but it seems strange that you don't
>>>> do it for the batch cache but you do for the other one.
>>>>
>>>>
>>> flush_batch does write to the controller using
>>> rpmh_rsc_write_ctrl_data()
>>
>>
>> My confusion is that they happen at different times.  As I understand it:
>>
>> * For the normal case, you immediately calls
>> rpmh_rsc_write_ctrl_data() and then later do the rest of the work.
>>
>> * For the batch case, you call both later.
>>
>> Is there a good reason for this, or is it just an arbitrary
>> difference?  If there's a good reason, it should be documented.
>>
>>
> In both the cases, the requests are cached in the rpmh library and are
> only sent to the controller only when the flushed. I am not sure the
> work is any different. The rpmh_flush() flushes out batch requests and
> then the requests from other drivers.

OK then, here are the code paths I see:

rpmh_write
=> __rpmh_write
   => cache_rpm_request()
   => (state != RPMH_ACTIVE_ONLY_STATE): rpmh_rsc_send_data()

rpmh_write_batch
=> (state != RPMH_ACTIVE_ONLY_STATE): cache_batch()
   => No call to rpmh_rsc_send_data()


Said another way:

* if you call rpmh_write() for something you're going to defer you
will still call cache_rpm_request() _before_ rpmh_write() returns.

* if you call rpmh_write_batch() for something you're going to defer
then you _don't_ call cache_rpm_request() before rpmh_write_batch()
returns.


Do you find a fault in my analysis of the current code?  If you see a
fault then please point it out.  If you don't see a fault, please say
why the behaviors are different.  I certainly understand that
eventually you will call cache_rpm_request() for both cases.  It's
just that in one case the call happens right away and the other case
it is deferred.
--
To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lina Iyer May 15, 2018, 6:03 p.m. UTC | #8
On Tue, May 15 2018 at 10:50 -0600, Doug Anderson wrote:
>Hi,
>
>On Tue, May 15, 2018 at 9:23 AM, Lina Iyer <ilina@codeaurora.org> wrote:
>> On Tue, May 15 2018 at 09:52 -0600, Doug Anderson wrote:
>>>
>>> Hi,
>>>
>>> On Mon, May 14, 2018 at 12:59 PM, Lina Iyer <ilina@codeaurora.org> wrote:
>>>
>>>>>>  /**
>>>>>> @@ -77,12 +82,14 @@ struct rpmh_request {
>>>>>>   * @cache: the list of cached requests
>>>>>>   * @lock: synchronize access to the controller data
>>>>>>   * @dirty: was the cache updated since flush
>>>>>> + * @batch_cache: Cache sleep and wake requests sent as batch
>>>>>>   */
>>>>>>  struct rpmh_ctrlr {
>>>>>>         struct rsc_drv *drv;
>>>>>>         struct list_head cache;
>>>>>>         spinlock_t lock;
>>>>>>         bool dirty;
>>>>>> +       const struct rpmh_request *batch_cache[RPMH_MAX_BATCH_CACHE];
>>>>>
>>>>>
>>>>>
>>>>> I'm pretty confused about why the "batch_cache" is separate from the
>>>>> normal cache.  As far as I can tell the purpose of the two is the same
>>>>> but you have two totally separate code paths and data structures.
>>>>>
>>>> Due to a hardware limitation, requests made by bus drivers must be set
>>>> up in the sleep and wake TCS first before setting up the requests from
>>>> other drivers. Bus drivers use batch mode for any and all RPMH
>>>> communication. Hence their request are the only ones in the batch_cache.
>>>
>>>
>>> This is totally not obvious and not commented.  Why not rename to
>>> "priority" instead of "batch"?
>>>
>>> If the only requirement is that these come first, that's still no
>>> reason to use totally separate code paths and mechanisms.  These
>>> requests can still use the same data structures / functions and just
>>> be ordered to come first, can't they?  ...or given a boolean
>>> "priority" and you can do two passes through your queue: one to do the
>>> priority ones and one to do the secondary ones.
>>>
>>>
>> The bus requests have a certain order and cannot be mutated by the
>> RPMH driver. It has to be maintained in the TCS in the same order.
>
>Please document this requirement in the code.
>
>
OK.

>> Also, the bus requests have quite a churn during the course of an
>> usecase. They are setup and invalidated often.
>> It is faster to have them separate and invalidate the whole lot of the
>> batch_cache instead of intertwining them with requests from other
>> drivers and then figuring out which all must be invalidated and rebuilt
>> when the next batch requests comes in. Remember, that requests may come
>> from any driver any time and therefore will be mangled if we use the
>> same data structure. So to be faster and to avoid having mangled requests
>> in the TCS, I have kept the data structures separate.
>
>If you need to find a certain group of requests then can't you just
>tag them and it's easy to find them?  I'm still not seeing the need
>for a whole separate code path and data structure.
>
Could be done but it will be slower and involve a lot more code than
separate data structures.

>>>>>> +       spin_unlock_irqrestore(&ctrlr->lock, flags);
>>>>>> +
>>>>>> +       return ret;
>>>>>> +}
>>>>>
>>>>>
>>>>>
>>>>> As part of my overall confusion about why the batch cache is different
>>>>> than the normal one: for the normal use case you still call
>>>>> rpmh_rsc_write_ctrl_data() for things you put in your cache, but you
>>>>> don't for the batch cache.  I still haven't totally figured out what
>>>>> rpmh_rsc_write_ctrl_data() does, but it seems strange that you don't
>>>>> do it for the batch cache but you do for the other one.
>>>>>
>>>>>
>>>> flush_batch does write to the controller using
>>>> rpmh_rsc_write_ctrl_data()
>>>
>>>
>>> My confusion is that they happen at different times.  As I understand it:
>>>
>>> * For the normal case, you immediately calls
>>> rpmh_rsc_write_ctrl_data() and then later do the rest of the work.
>>>
>>> * For the batch case, you call both later.
>>>
>>> Is there a good reason for this, or is it just an arbitrary
>>> difference?  If there's a good reason, it should be documented.
>>>
>>>
>> In both the cases, the requests are cached in the rpmh library and are
>> only sent to the controller only when the flushed. I am not sure the
>> work is any different. The rpmh_flush() flushes out batch requests and
>> then the requests from other drivers.
>
>OK then, here are the code paths I see:
>
>rpmh_write
>=> __rpmh_write
>   => cache_rpm_request()
>   => (state != RPMH_ACTIVE_ONLY_STATE): rpmh_rsc_send_data()
>
>rpmh_write_batch
>=> (state != RPMH_ACTIVE_ONLY_STATE): cache_batch()
>   => No call to rpmh_rsc_send_data()
>
>
>Said another way:
>
>* if you call rpmh_write() for something you're going to defer you
>will still call cache_rpm_request() _before_ rpmh_write() returns.
>
>* if you call rpmh_write_batch() for something you're going to defer
>then you _don't_ call cache_rpm_request() before rpmh_write_batch()
>returns.
>
>
>Do you find a fault in my analysis of the current code?  If you see a
>fault then please point it out.  If you don't see a fault, please say
>why the behaviors are different.  I certainly understand that
>eventually you will call cache_rpm_request() for both cases.  It's
>just that in one case the call happens right away and the other case
>it is deferred.
True. I see where your confusion is. It is because of an optimization and
our existential need for saving power at every opportunity.

For rpmh_write path -
The reason being for the disparity is that, we can vote for a system low
power mode (modes where we flush the caches and put the entire silicon
is a low power state) at any point when all the CPUs are idle. If a
driver has updated its vote for a system resource, it needs to be
updated in the cache and thats what cache_rpm_request() does. Its
possible that we may enter a system low power mode immediately after
that. The rpmh_flush() would be called by the idle driver and we would
flush the cached values and enter the idle state. By writing immediately
to the TCS, we avoid increasing the latency of entering an idle state.

For the rpmh_write_batch() path -
The Bus driver would invalidate the TCS and the batch_cache. The use
case fills up the batch_cache with values as needed by the use case.
While during the usecase, the CPUs can go idle and the idle drivers
would call rpmh_flush(). At that time, we would write the batch_cache
into the already invalidated TCSes and then write the rest of the cached
requests from ->cache. We then enter the low power modes.

The optimization of writing the sleep/wake votes in the same context of
the driver making the request, helps us avoid writing some extra
registers in the critical idle path.

Hope this helps.

-- Lina



--
To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Douglas Anderson May 15, 2018, 7:52 p.m. UTC | #9
Hi,

On Tue, May 15, 2018 at 11:03 AM, Lina Iyer <ilina@codeaurora.org> wrote:
>>> Also, the bus requests have quite a churn during the course of an
>>> usecase. They are setup and invalidated often.
>>> It is faster to have them separate and invalidate the whole lot of the
>>> batch_cache instead of intertwining them with requests from other
>>> drivers and then figuring out which all must be invalidated and rebuilt
>>> when the next batch requests comes in. Remember, that requests may come
>>> from any driver any time and therefore will be mangled if we use the
>>> same data structure. So to be faster and to avoid having mangled requests
>>> in the TCS, I have kept the data structures separate.
>>
>>
>> If you need to find a certain group of requests then can't you just
>> tag them and it's easy to find them?  I'm still not seeing the need
>> for a whole separate code path and data structure.
>>
> Could be done but it will be slower and involve a lot more code than
> separate data structures.

When you say "a lot more code", you mean that you'll have to write a
lot more code or that each request will execute a lot more code?  I
would argue that very little code would need to be added (compared to
your patch) if rpmh_write_batch() was implemented atop
rpmh_write_async().  Even with extra tagging / prioritization it would
be small.  I could try to prototype it I guess.

I think you mean that it would _execute_ a lot more code and thus be
slower.  Is that right?  Is your main argument here that statically
pre-allocating 20 slots in an array is going to be a lot faster than
kzalloc-ing 20 linked list nodes and chaining them together?  If
you're truly worried about the kzalloc calls being slow, I suppose you
could always allocate them with a kmem_cache.  ...but we're talking
fewer than 20 kzalloc calls, right?

I'll also make an argument (with no data to back me up) that
maintaining separate functions for handling the batch cases will slow
down your kernel because your footprint in the icache will be bigger
and you'll be more likely to kick something else out that actually
needs to run fast.


>>>>>>> +       spin_unlock_irqrestore(&ctrlr->lock, flags);
>>>>>>> +
>>>>>>> +       return ret;
>>>>>>> +}
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>> As part of my overall confusion about why the batch cache is different
>>>>>> than the normal one: for the normal use case you still call
>>>>>> rpmh_rsc_write_ctrl_data() for things you put in your cache, but you
>>>>>> don't for the batch cache.  I still haven't totally figured out what
>>>>>> rpmh_rsc_write_ctrl_data() does, but it seems strange that you don't
>>>>>> do it for the batch cache but you do for the other one.
>>>>>>
>>>>>>
>>>>> flush_batch does write to the controller using
>>>>> rpmh_rsc_write_ctrl_data()
>>>>
>>>>
>>>>
>>>> My confusion is that they happen at different times.  As I understand
>>>> it:
>>>>
>>>> * For the normal case, you immediately calls
>>>> rpmh_rsc_write_ctrl_data() and then later do the rest of the work.
>>>>
>>>> * For the batch case, you call both later.
>>>>
>>>> Is there a good reason for this, or is it just an arbitrary
>>>> difference?  If there's a good reason, it should be documented.
>>>>
>>>>
>>> In both the cases, the requests are cached in the rpmh library and are
>>> only sent to the controller only when the flushed. I am not sure the
>>> work is any different. The rpmh_flush() flushes out batch requests and
>>> then the requests from other drivers.
>>
>>
>> OK then, here are the code paths I see:
>>
>> rpmh_write
>> => __rpmh_write
>>   => cache_rpm_request()
>>   => (state != RPMH_ACTIVE_ONLY_STATE): rpmh_rsc_send_data()
>>
>> rpmh_write_batch
>> => (state != RPMH_ACTIVE_ONLY_STATE): cache_batch()
>>   => No call to rpmh_rsc_send_data()
>>
>>
>> Said another way:
>>
>> * if you call rpmh_write() for something you're going to defer you
>> will still call cache_rpm_request() _before_ rpmh_write() returns.
>>
>> * if you call rpmh_write_batch() for something you're going to defer
>> then you _don't_ call cache_rpm_request() before rpmh_write_batch()
>> returns.
>>
>>
>> Do you find a fault in my analysis of the current code?  If you see a
>> fault then please point it out.  If you don't see a fault, please say
>> why the behaviors are different.  I certainly understand that
>> eventually you will call cache_rpm_request() for both cases.  It's
>> just that in one case the call happens right away and the other case
>> it is deferred.
>
> True. I see where your confusion is. It is because of an optimization and
> our existential need for saving power at every opportunity.
>
> For rpmh_write path -
> The reason being for the disparity is that, we can vote for a system low
> power mode (modes where we flush the caches and put the entire silicon
> is a low power state) at any point when all the CPUs are idle. If a
> driver has updated its vote for a system resource, it needs to be
> updated in the cache and thats what cache_rpm_request() does. Its
> possible that we may enter a system low power mode immediately after
> that. The rpmh_flush() would be called by the idle driver and we would
> flush the cached values and enter the idle state. By writing immediately
> to the TCS, we avoid increasing the latency of entering an idle state.
>
> For the rpmh_write_batch() path -
> The Bus driver would invalidate the TCS and the batch_cache. The use
> case fills up the batch_cache with values as needed by the use case.
> While during the usecase, the CPUs can go idle and the idle drivers
> would call rpmh_flush(). At that time, we would write the batch_cache
> into the already invalidated TCSes and then write the rest of the cached
> requests from ->cache. We then enter the low power modes.
>
> The optimization of writing the sleep/wake votes in the same context of
> the driver making the request, helps us avoid writing some extra
> registers in the critical idle path.

I don't think I followed all that, but I'll try to read deeper into
RPMh.  ...but one question: could the two paths be changed to be the
same?  AKA would it be bad to call rpmh_rsc_send_data() synchronously
in rpmh_write_batch()?

If the two cases really need to behave differently then it should be
documented in the code.


-Doug
--
To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Raju P.L.S.S.S.N May 23, 2018, 1:27 p.m. UTC | #10
Hi,

will reply on points other than what Lina has responded.

On 5/12/2018 1:49 AM, Doug Anderson wrote:
> Hi,
> 
> On Wed, May 9, 2018 at 10:01 AM, Lina Iyer <ilina@codeaurora.org> wrote:
>>   /**
>> @@ -77,12 +82,14 @@ struct rpmh_request {
>>    * @cache: the list of cached requests
>>    * @lock: synchronize access to the controller data
>>    * @dirty: was the cache updated since flush
>> + * @batch_cache: Cache sleep and wake requests sent as batch
>>    */
>>   struct rpmh_ctrlr {
>>          struct rsc_drv *drv;
>>          struct list_head cache;
>>          spinlock_t lock;
>>          bool dirty;
>> +       const struct rpmh_request *batch_cache[RPMH_MAX_BATCH_CACHE];
> 
> I'm pretty confused about why the "batch_cache" is separate from the
> normal cache.  As far as I can tell the purpose of the two is the same
> but you have two totally separate code paths and data structures.
> 
> 
>>   };
>>
>>   static struct rpmh_ctrlr rpmh_rsc[RPMH_MAX_CTRLR];
>> @@ -133,6 +140,7 @@ void rpmh_tx_done(const struct tcs_request *msg, int r)
>>          struct rpmh_request *rpm_msg = container_of(msg, struct rpmh_request,
>>                                                      msg);
>>          struct completion *compl = rpm_msg->completion;
>> +       atomic_t *wc = rpm_msg->wait_count;
>>
>>          rpm_msg->err = r;
>>
>> @@ -143,8 +151,13 @@ void rpmh_tx_done(const struct tcs_request *msg, int r)
>>          kfree(rpm_msg->free);
>>
>>          /* Signal the blocking thread we are done */
>> -       if (compl)
>> -               complete(compl);
>> +       if (!compl)
>> +               return;
> 
> The comment above this "if" block no longer applies to the line next
> to it after your patch.  ...but below I suggest you get rid of
> "wait_count", so maybe this part of the patch will go away.
> 
> 
>> +static int cache_batch(struct rpmh_ctrlr *ctrlr,
>> +                      struct rpmh_request **rpm_msg, int count)
>> +{
>> +       unsigned long flags;
>> +       int ret = 0;
>> +       int index = 0;
>> +       int i;
>> +
>> +       spin_lock_irqsave(&ctrlr->lock, flags);
>> +       while (index < RPMH_MAX_BATCH_CACHE && ctrlr->batch_cache[index])
>> +               index++;
>> +       if (index + count >= RPMH_MAX_BATCH_CACHE) {
>> +               ret = -ENOMEM;
>> +               goto fail;
>> +       }
>> +
>> +       for (i = 0; i < count; i++)
>> +               ctrlr->batch_cache[index + i] = rpm_msg[i];
>> +fail:
> 
> Nit: this label is for both failure and normal exit, so call it "exit".
> 
> 
>> +       spin_unlock_irqrestore(&ctrlr->lock, flags);
>> +
>> +       return ret;
>> +}
> 
> As part of my overall confusion about why the batch cache is different
> than the normal one: for the normal use case you still call
> rpmh_rsc_write_ctrl_data() for things you put in your cache, but you
> don't for the batch cache.  I still haven't totally figured out what
> rpmh_rsc_write_ctrl_data() does, but it seems strange that you don't
> do it for the batch cache but you do for the other one.
> 
> 
>> +/**
>> + * rpmh_write_batch: Write multiple sets of RPMH commands and wait for the
>> + * batch to finish.
>> + *
>> + * @dev: the device making the request
>> + * @state: Active/sleep set
>> + * @cmd: The payload data
>> + * @n: The array of count of elements in each batch, 0 terminated.
>> + *
>> + * Write a request to the RSC controller without caching. If the request
>> + * state is ACTIVE, then the requests are treated as completion request
>> + * and sent to the controller immediately. The function waits until all the
>> + * commands are complete. If the request was to SLEEP or WAKE_ONLY, then the
>> + * request is sent as fire-n-forget and no ack is expected.
>> + *
>> + * May sleep. Do not call from atomic contexts for ACTIVE_ONLY requests.
>> + */
>> +int rpmh_write_batch(const struct device *dev, enum rpmh_state state,
>> +                    const struct tcs_cmd *cmd, u32 *n)
>> +{
>> +       struct rpmh_request *rpm_msg[RPMH_MAX_REQ_IN_BATCH] = { NULL };
>> +       DECLARE_COMPLETION_ONSTACK(compl);
>> +       atomic_t wait_count = ATOMIC_INIT(0);
>> +       struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
>> +       int count = 0;
>> +       int ret, i;
>> +
>> +       if (IS_ERR(ctrlr) || !cmd || !n)
>> +               return -EINVAL;
>> +
>> +       while (n[count++] > 0)
>> +               ;
>> +       count--;
>> +       if (!count || count > RPMH_MAX_REQ_IN_BATCH)
>> +               return -EINVAL;
>> +
>> +       for (i = 0; i < count; i++) {
>> +               rpm_msg[i] = __get_rpmh_msg_async(state, cmd, n[i]);
>> +               if (IS_ERR_OR_NULL(rpm_msg[i])) {
> 
> Just "IS_ERR".  It's never NULL.
> ...also add a i-- somewhere in here or you're going to be kfree()ing
> your error value, aren't you?

Sure. Will make change in next patch.

> 
> 
>> +                       ret = PTR_ERR(rpm_msg[i]);
>> +                       for (; i >= 0; i--)
>> +                               kfree(rpm_msg[i]->free);
>> +                       return ret;
>> +               }
>> +               cmd += n[i];
>> +       }
>> +
>> +       if (state != RPMH_ACTIVE_ONLY_STATE)
>> +               return cache_batch(ctrlr, rpm_msg, count);
> 
> Don't you need to free rpm_msg items in this case?
> 
> 
>> +
>> +       atomic_set(&wait_count, count);
>> +
>> +       for (i = 0; i < count; i++) {
>> +               rpm_msg[i]->completion = &compl;
>> +               rpm_msg[i]->wait_count = &wait_count;
>> +               ret = rpmh_rsc_send_data(ctrlr->drv, &rpm_msg[i]->msg);
>> +               if (ret) {
>> +                       int j;
>> +
>> +                       pr_err("Error(%d) sending RPMH message addr=%#x\n",
>> +                              ret, rpm_msg[i]->msg.cmds[0].addr);
>> +                       for (j = i; j < count; j++)
>> +                               rpmh_tx_done(&rpm_msg[j]->msg, ret);
> 
> You're just using rpmh_tx_done() to free memory?  Note that you'll
> probably do your error handling in this function a favor if you rename
> __get_rpmh_msg_async() to __fill_rpmh_msg() and remove the memory
> allocation from there.  Then you can do one big allocation of the
> whole array in rpmh_write_batch() and then you'll only have one free
> at the end...
> 
> 
> 
>> +                       break;
> 
> "break" seems wrong here.  You'll end up waiting for the completion,
> then I guess timing out, then returning -ETIMEDOUT?

As the loop above break runs for remaining count, completion will be 
notified so there will not be waiting.

Thanks,
Raju

> 
> 
>> +               }
>> +       }
>> +
>> +       ret = wait_for_completion_timeout(&compl, RPMH_TIMEOUT_MS);
> 
> The "wait_count" abstraction is confusing and I believe it's not
> needed.  I think you can remove it and change the above to this
> (untested) code:
> 
> time_left = RPMH_TIMEOUT_MS;
> for (i = 0; i < count; i++) {
>    time_left = wait_for_completion_timeout(&compl, time_left);
>    if (!time_left)
>      return -ETIMEDOUT;
> }
> 
> ...specifically completions are additive, so just wait "count" times
> and then the reader doesn't need to learn your new wait_count
> abstraction and try to reason about it.
> 
> ...and, actually, I argue in other replies that this should't use a
> timeout, so even cleaner:
> 
> for (i = 0; i < count; i++)
>    wait_for_completion(&compl);
> 
> 
> Once you do that, you can also get rid of the need to pre-count "n",
> so all your loops turn into:
> 
> for (i = 0; n[i]; i++)
> 
> 
> I suppose you might want to get rid of "RPMH_MAX_REQ_IN_BATCH" and
> dynamically allocate your array too, but that seems sane.  As per
> above it seems like you should just dynamically allocate a whole array
> of "struct rpmh_request" items at once anyway.
> 
> ---
> 
>> +       return (ret > 0) ? 0 : -ETIMEDOUT;
>> +
>> +}
>> +EXPORT_SYMBOL(rpmh_write_batch);
> 
> Perhaps an even simpler thing than taking all my advice above: can't
> you just add a optional completion to rpmh_write_async()?  That would
> just be stuffed into rpm_msg.
> 
> Now your batch code would just be a bunch of calls to
> rpmh_write_async() with an equal number of wait_for_completion() calls
> at the end.  Is there a reason that wouldn't work?  You'd get rid of
> _a lot_ of code.
> 
> 
> -Doug
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Douglas Anderson May 30, 2018, 9:48 p.m. UTC | #11
Hi,

On Wed, May 23, 2018 at 6:27 AM, Raju P L S S S N
<rplsssn@codeaurora.org> wrote:
>>> +                       break;
>>
>>
>> "break" seems wrong here.  You'll end up waiting for the completion,
>> then I guess timing out, then returning -ETIMEDOUT?
>
>
> As the loop above break runs for remaining count, completion will be
> notified so there will not be waiting.

Ah, I see.  I missed that.

...oh, I guess you need to wait because any requests you might have
made will try to signal your completion which is on the stack...
--
To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/soc/qcom/rpmh.c b/drivers/soc/qcom/rpmh.c
index 1bb62876795c..a0e277b4b846 100644
--- a/drivers/soc/qcom/rpmh.c
+++ b/drivers/soc/qcom/rpmh.c
@@ -21,6 +21,8 @@ 
 #include "rpmh-internal.h"
 
 #define RPMH_TIMEOUT_MS			msecs_to_jiffies(10000)
+#define RPMH_MAX_REQ_IN_BATCH		10
+#define RPMH_MAX_BATCH_CACHE		(2 * RPMH_MAX_REQ_IN_BATCH)
 
 #define DEFINE_RPMH_MSG_ONSTACK(dev, s, q, name)	\
 	struct rpmh_request name = {			\
@@ -34,6 +36,7 @@ 
 		.completion = q,			\
 		.dev = dev,				\
 		.free = NULL,				\
+		.wait_count = NULL,			\
 	}
 
 /**
@@ -60,6 +63,7 @@  struct cache_req {
  * @dev: the device making the request
  * @err: err return from the controller
  * @free: the request object to be freed at tx_done
+ * @wait_count: count of waiters for this completion
  */
 struct rpmh_request {
 	struct tcs_request msg;
@@ -68,6 +72,7 @@  struct rpmh_request {
 	const struct device *dev;
 	int err;
 	struct rpmh_request *free;
+	atomic_t *wait_count;
 };
 
 /**
@@ -77,12 +82,14 @@  struct rpmh_request {
  * @cache: the list of cached requests
  * @lock: synchronize access to the controller data
  * @dirty: was the cache updated since flush
+ * @batch_cache: Cache sleep and wake requests sent as batch
  */
 struct rpmh_ctrlr {
 	struct rsc_drv *drv;
 	struct list_head cache;
 	spinlock_t lock;
 	bool dirty;
+	const struct rpmh_request *batch_cache[RPMH_MAX_BATCH_CACHE];
 };
 
 static struct rpmh_ctrlr rpmh_rsc[RPMH_MAX_CTRLR];
@@ -133,6 +140,7 @@  void rpmh_tx_done(const struct tcs_request *msg, int r)
 	struct rpmh_request *rpm_msg = container_of(msg, struct rpmh_request,
 						    msg);
 	struct completion *compl = rpm_msg->completion;
+	atomic_t *wc = rpm_msg->wait_count;
 
 	rpm_msg->err = r;
 
@@ -143,8 +151,13 @@  void rpmh_tx_done(const struct tcs_request *msg, int r)
 	kfree(rpm_msg->free);
 
 	/* Signal the blocking thread we are done */
-	if (compl)
-		complete(compl);
+	if (!compl)
+		return;
+
+	if (wc && !atomic_dec_and_test(wc))
+		return;
+
+	complete(compl);
 }
 EXPORT_SYMBOL(rpmh_tx_done);
 
@@ -332,6 +345,137 @@  int rpmh_write(const struct device *dev, enum rpmh_state state,
 }
 EXPORT_SYMBOL(rpmh_write);
 
+static int cache_batch(struct rpmh_ctrlr *ctrlr,
+		       struct rpmh_request **rpm_msg, int count)
+{
+	unsigned long flags;
+	int ret = 0;
+	int index = 0;
+	int i;
+
+	spin_lock_irqsave(&ctrlr->lock, flags);
+	while (index < RPMH_MAX_BATCH_CACHE && ctrlr->batch_cache[index])
+		index++;
+	if (index + count >= RPMH_MAX_BATCH_CACHE) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	for (i = 0; i < count; i++)
+		ctrlr->batch_cache[index + i] = rpm_msg[i];
+fail:
+	spin_unlock_irqrestore(&ctrlr->lock, flags);
+
+	return ret;
+}
+
+static int flush_batch(struct rpmh_ctrlr *ctrlr)
+{
+	const struct rpmh_request *rpm_msg;
+	unsigned long flags;
+	int ret = 0;
+	int i;
+
+	/* Send Sleep/Wake requests to the controller, expect no response */
+	spin_lock_irqsave(&ctrlr->lock, flags);
+	for (i = 0; ctrlr->batch_cache[i]; i++) {
+		rpm_msg = ctrlr->batch_cache[i];
+		ret = rpmh_rsc_write_ctrl_data(ctrlr->drv, &rpm_msg->msg);
+		if (ret)
+			break;
+	}
+	spin_unlock_irqrestore(&ctrlr->lock, flags);
+
+	return ret;
+}
+
+static void invalidate_batch(struct rpmh_ctrlr *ctrlr)
+{
+	unsigned long flags;
+	int index = 0;
+
+	spin_lock_irqsave(&ctrlr->lock, flags);
+	while (ctrlr->batch_cache[index]) {
+		kfree(ctrlr->batch_cache[index]->free);
+		ctrlr->batch_cache[index] = NULL;
+		index++;
+	}
+	spin_unlock_irqrestore(&ctrlr->lock, flags);
+}
+
+/**
+ * rpmh_write_batch: Write multiple sets of RPMH commands and wait for the
+ * batch to finish.
+ *
+ * @dev: the device making the request
+ * @state: Active/sleep set
+ * @cmd: The payload data
+ * @n: The array of count of elements in each batch, 0 terminated.
+ *
+ * Write a request to the RSC controller without caching. If the request
+ * state is ACTIVE, then the requests are treated as completion request
+ * and sent to the controller immediately. The function waits until all the
+ * commands are complete. If the request was to SLEEP or WAKE_ONLY, then the
+ * request is sent as fire-n-forget and no ack is expected.
+ *
+ * May sleep. Do not call from atomic contexts for ACTIVE_ONLY requests.
+ */
+int rpmh_write_batch(const struct device *dev, enum rpmh_state state,
+		     const struct tcs_cmd *cmd, u32 *n)
+{
+	struct rpmh_request *rpm_msg[RPMH_MAX_REQ_IN_BATCH] = { NULL };
+	DECLARE_COMPLETION_ONSTACK(compl);
+	atomic_t wait_count = ATOMIC_INIT(0);
+	struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
+	int count = 0;
+	int ret, i;
+
+	if (IS_ERR(ctrlr) || !cmd || !n)
+		return -EINVAL;
+
+	while (n[count++] > 0)
+		;
+	count--;
+	if (!count || count > RPMH_MAX_REQ_IN_BATCH)
+		return -EINVAL;
+
+	for (i = 0; i < count; i++) {
+		rpm_msg[i] = __get_rpmh_msg_async(state, cmd, n[i]);
+		if (IS_ERR_OR_NULL(rpm_msg[i])) {
+			ret = PTR_ERR(rpm_msg[i]);
+			for (; i >= 0; i--)
+				kfree(rpm_msg[i]->free);
+			return ret;
+		}
+		cmd += n[i];
+	}
+
+	if (state != RPMH_ACTIVE_ONLY_STATE)
+		return cache_batch(ctrlr, rpm_msg, count);
+
+	atomic_set(&wait_count, count);
+
+	for (i = 0; i < count; i++) {
+		rpm_msg[i]->completion = &compl;
+		rpm_msg[i]->wait_count = &wait_count;
+		ret = rpmh_rsc_send_data(ctrlr->drv, &rpm_msg[i]->msg);
+		if (ret) {
+			int j;
+
+			pr_err("Error(%d) sending RPMH message addr=%#x\n",
+			       ret, rpm_msg[i]->msg.cmds[0].addr);
+			for (j = i; j < count; j++)
+				rpmh_tx_done(&rpm_msg[j]->msg, ret);
+			break;
+		}
+	}
+
+	ret = wait_for_completion_timeout(&compl, RPMH_TIMEOUT_MS);
+	return (ret > 0) ? 0 : -ETIMEDOUT;
+
+}
+EXPORT_SYMBOL(rpmh_write_batch);
+
 static int is_req_valid(struct cache_req *req)
 {
 	return (req->sleep_val != UINT_MAX &&
@@ -383,6 +527,11 @@  int rpmh_flush(const struct device *dev)
 		return 0;
 	}
 
+	/* First flush the cached batch requests */
+	ret = flush_batch(ctrlr);
+	if (ret)
+		return ret;
+
 	/*
 	 * Nobody else should be calling this function other than system PM,
 	 * hence we can run without locks.
@@ -424,6 +573,7 @@  int rpmh_invalidate(const struct device *dev)
 	if (IS_ERR(ctrlr))
 		return PTR_ERR(ctrlr);
 
+	invalidate_batch(ctrlr);
 	ctrlr->dirty = true;
 
 	do {
diff --git a/include/soc/qcom/rpmh.h b/include/soc/qcom/rpmh.h
index 1161a5c77e75..619e07c75da9 100644
--- a/include/soc/qcom/rpmh.h
+++ b/include/soc/qcom/rpmh.h
@@ -17,6 +17,9 @@  int rpmh_write(const struct device *dev, enum rpmh_state state,
 int rpmh_write_async(const struct device *dev, enum rpmh_state state,
 		     const struct tcs_cmd *cmd, u32 n);
 
+int rpmh_write_batch(const struct device *dev, enum rpmh_state state,
+		     const struct tcs_cmd *cmd, u32 *n);
+
 int rpmh_flush(const struct device *dev);
 
 int rpmh_invalidate(const struct device *dev);
@@ -32,6 +35,11 @@  static inline int rpmh_write_async(const struct device *dev,
 				   const struct tcs_cmd *cmd, u32 n)
 { return -ENODEV; }
 
+static inline int rpmh_write_batch(const struct device *dev,
+				   enum rpmh_state state,
+				   const struct tcs_cmd *cmd, u32 *n)
+{ return -ENODEV; }
+
 static inline int rpmh_flush(const struct device *dev)
 { return -ENODEV; }