diff mbox series

[v4,08/12] xen/spinlock: add another function level

Message ID 20231212094725.22184-9-jgross@suse.com (mailing list archive)
State New
Headers show
Series xen/spinlock: make recursive spinlocks a dedicated type | expand

Commit Message

Jürgen Groß Dec. 12, 2023, 9:47 a.m. UTC
Add another function level in spinlock.c hiding the spinlock_t layout
from the low level locking code.

This is done in preparation of introducing rspinlock_t for recursive
locks without having to duplicate all of the locking code.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
V2:
- new patch
---
 xen/common/spinlock.c      | 104 +++++++++++++++++++++++--------------
 xen/include/xen/spinlock.h |   1 +
 2 files changed, 65 insertions(+), 40 deletions(-)

Comments

Julien Grall Dec. 12, 2023, 7:10 p.m. UTC | #1
Hi Juergen,

On 12/12/2023 09:47, Juergen Gross wrote:
> Add another function level in spinlock.c hiding the spinlock_t layout
> from the low level locking code.
> 
> This is done in preparation of introducing rspinlock_t for recursive
> locks without having to duplicate all of the locking code.

So all the fields you pass are the one from spinlock.

Looking at pahole after this series is applid, we have:

==== Debug + Lock profile ====

struct spinlock {
         spinlock_tickets_t         tickets;              /*     0     4 */
         union lock_debug           debug;                /*     4     4 */
         struct lock_profile *      profile;              /*     8     8 */

         /* size: 16, cachelines: 1, members: 3 */
         /* last cacheline: 16 bytes */
};
struct rspinlock {
         spinlock_tickets_t         tickets;              /*     0     4 */
         uint16_t                   recurse_cpu;          /*     4     2 */
         uint8_t                    recurse_cnt;          /*     6     1 */

         /* XXX 1 byte hole, try to pack */

         union lock_debug           debug;                /*     8     4 */

         /* XXX 4 bytes hole, try to pack */

         struct lock_profile *      profile;              /*    16     8 */

         /* size: 24, cachelines: 1, members: 5 */
         /* sum members: 19, holes: 2, sum holes: 5 */
         /* last cacheline: 24 bytes */
};


==== Debug ====

struct spinlock {
         spinlock_tickets_t         tickets;              /*     0     4 */
         union lock_debug           debug;                /*     4     4 */

         /* size: 8, cachelines: 1, members: 2 */
         /* last cacheline: 8 bytes */
};
struct rspinlock {
         spinlock_tickets_t         tickets;              /*     0     4 */
         uint16_t                   recurse_cpu;          /*     4     2 */
         uint8_t                    recurse_cnt;          /*     6     1 */

         /* XXX 1 byte hole, try to pack */

         union lock_debug           debug;                /*     8     4 */

         /* size: 12, cachelines: 1, members: 4 */
         /* sum members: 11, holes: 1, sum holes: 1 */
         /* last cacheline: 12 bytes */
};

==== Prod ====

struct spinlock {
         spinlock_tickets_t         tickets;              /*     0     4 */
         union lock_debug           debug;                /*     4     0 */

         /* size: 4, cachelines: 1, members: 2 */
         /* last cacheline: 4 bytes */
};
struct rspinlock {
         spinlock_tickets_t         tickets;              /*     0     4 */
         uint16_t                   recurse_cpu;          /*     4     2 */
         uint8_t                    recurse_cnt;          /*     6     1 */
         union lock_debug           debug;                /*     7     0 */

         /* size: 8, cachelines: 1, members: 4 */
         /* padding: 1 */
         /* last cacheline: 8 bytes */
};


I think we could embed spinlock_t in rspinlock_t without increasing 
rspinlock_t. Have you considered it? This could reduce the number of 
function level introduced in this series.

Cheers,
Jürgen Groß Dec. 13, 2023, 6:23 a.m. UTC | #2
On 12.12.23 20:10, Julien Grall wrote:
> Hi Juergen,
> 
> On 12/12/2023 09:47, Juergen Gross wrote:
>> Add another function level in spinlock.c hiding the spinlock_t layout
>> from the low level locking code.
>>
>> This is done in preparation of introducing rspinlock_t for recursive
>> locks without having to duplicate all of the locking code.
> 
> So all the fields you pass are the one from spinlock.
> 
> Looking at pahole after this series is applid, we have:
> 
> ==== Debug + Lock profile ====
> 
> struct spinlock {
>          spinlock_tickets_t         tickets;              /*     0     4 */
>          union lock_debug           debug;                /*     4     4 */
>          struct lock_profile *      profile;              /*     8     8 */
> 
>          /* size: 16, cachelines: 1, members: 3 */
>          /* last cacheline: 16 bytes */
> };
> struct rspinlock {
>          spinlock_tickets_t         tickets;              /*     0     4 */
>          uint16_t                   recurse_cpu;          /*     4     2 */
>          uint8_t                    recurse_cnt;          /*     6     1 */
> 
>          /* XXX 1 byte hole, try to pack */
> 
>          union lock_debug           debug;                /*     8     4 */
> 
>          /* XXX 4 bytes hole, try to pack */
> 
>          struct lock_profile *      profile;              /*    16     8 */
> 
>          /* size: 24, cachelines: 1, members: 5 */
>          /* sum members: 19, holes: 2, sum holes: 5 */
>          /* last cacheline: 24 bytes */
> };
> 
> 
> ==== Debug ====
> 
> struct spinlock {
>          spinlock_tickets_t         tickets;              /*     0     4 */
>          union lock_debug           debug;                /*     4     4 */
> 
>          /* size: 8, cachelines: 1, members: 2 */
>          /* last cacheline: 8 bytes */
> };
> struct rspinlock {
>          spinlock_tickets_t         tickets;              /*     0     4 */
>          uint16_t                   recurse_cpu;          /*     4     2 */
>          uint8_t                    recurse_cnt;          /*     6     1 */
> 
>          /* XXX 1 byte hole, try to pack */
> 
>          union lock_debug           debug;                /*     8     4 */
> 
>          /* size: 12, cachelines: 1, members: 4 */
>          /* sum members: 11, holes: 1, sum holes: 1 */
>          /* last cacheline: 12 bytes */
> };
> 
> ==== Prod ====
> 
> struct spinlock {
>          spinlock_tickets_t         tickets;              /*     0     4 */
>          union lock_debug           debug;                /*     4     0 */
> 
>          /* size: 4, cachelines: 1, members: 2 */
>          /* last cacheline: 4 bytes */
> };
> struct rspinlock {
>          spinlock_tickets_t         tickets;              /*     0     4 */
>          uint16_t                   recurse_cpu;          /*     4     2 */
>          uint8_t                    recurse_cnt;          /*     6     1 */
>          union lock_debug           debug;                /*     7     0 */
> 
>          /* size: 8, cachelines: 1, members: 4 */
>          /* padding: 1 */
>          /* last cacheline: 8 bytes */
> };
> 
> 
> I think we could embed spinlock_t in rspinlock_t without increasing rspinlock_t. 
> Have you considered it? This could reduce the number of function level 
> introduced in this series.

That was the layout in the first version of this series. Jan requested to change
it to the current layout [1].


Juergen

[1]: https://lists.xen.org/archives/html/xen-devel/2022-12/msg01054.html
Julien Grall Dec. 13, 2023, 8:43 a.m. UTC | #3
Hi Juergen,

On 13/12/2023 06:23, Juergen Gross wrote:
> On 12.12.23 20:10, Julien Grall wrote:
>> Hi Juergen,
>>
>> On 12/12/2023 09:47, Juergen Gross wrote:
>>> Add another function level in spinlock.c hiding the spinlock_t layout
>>> from the low level locking code.
>>>
>>> This is done in preparation of introducing rspinlock_t for recursive
>>> locks without having to duplicate all of the locking code.
>>
>> So all the fields you pass are the one from spinlock.
>>
>> Looking at pahole after this series is applid, we have:
>>
>> ==== Debug + Lock profile ====
>>
>> struct spinlock {
>>          spinlock_tickets_t         tickets;              /*     0     
>> 4 */
>>          union lock_debug           debug;                /*     4     
>> 4 */
>>          struct lock_profile *      profile;              /*     8     
>> 8 */
>>
>>          /* size: 16, cachelines: 1, members: 3 */
>>          /* last cacheline: 16 bytes */
>> };
>> struct rspinlock {
>>          spinlock_tickets_t         tickets;              /*     0     
>> 4 */
>>          uint16_t                   recurse_cpu;          /*     4     
>> 2 */
>>          uint8_t                    recurse_cnt;          /*     6     
>> 1 */
>>
>>          /* XXX 1 byte hole, try to pack */
>>
>>          union lock_debug           debug;                /*     8     
>> 4 */
>>
>>          /* XXX 4 bytes hole, try to pack */
>>
>>          struct lock_profile *      profile;              /*    16     
>> 8 */
>>
>>          /* size: 24, cachelines: 1, members: 5 */
>>          /* sum members: 19, holes: 2, sum holes: 5 */
>>          /* last cacheline: 24 bytes */
>> };
>>
>>
>> ==== Debug ====
>>
>> struct spinlock {
>>          spinlock_tickets_t         tickets;              /*     0     
>> 4 */
>>          union lock_debug           debug;                /*     4     
>> 4 */
>>
>>          /* size: 8, cachelines: 1, members: 2 */
>>          /* last cacheline: 8 bytes */
>> };
>> struct rspinlock {
>>          spinlock_tickets_t         tickets;              /*     0     
>> 4 */
>>          uint16_t                   recurse_cpu;          /*     4     
>> 2 */
>>          uint8_t                    recurse_cnt;          /*     6     
>> 1 */
>>
>>          /* XXX 1 byte hole, try to pack */
>>
>>          union lock_debug           debug;                /*     8     
>> 4 */
>>
>>          /* size: 12, cachelines: 1, members: 4 */
>>          /* sum members: 11, holes: 1, sum holes: 1 */
>>          /* last cacheline: 12 bytes */
>> };
>>
>> ==== Prod ====
>>
>> struct spinlock {
>>          spinlock_tickets_t         tickets;              /*     0     
>> 4 */
>>          union lock_debug           debug;                /*     4     
>> 0 */
>>
>>          /* size: 4, cachelines: 1, members: 2 */
>>          /* last cacheline: 4 bytes */
>> };
>> struct rspinlock {
>>          spinlock_tickets_t         tickets;              /*     0     
>> 4 */
>>          uint16_t                   recurse_cpu;          /*     4     
>> 2 */
>>          uint8_t                    recurse_cnt;          /*     6     
>> 1 */
>>          union lock_debug           debug;                /*     7     
>> 0 */
>>
>>          /* size: 8, cachelines: 1, members: 4 */
>>          /* padding: 1 */
>>          /* last cacheline: 8 bytes */
>> };
>>
>>
>> I think we could embed spinlock_t in rspinlock_t without increasing 
>> rspinlock_t. Have you considered it? This could reduce the number of 
>> function level introduced in this series.
> 
> That was the layout in the first version of this series. Jan requested 
> to change
> it to the current layout [1].

Ah... Looking through the reasoning, I have to disagree with Jan 
argumentations. At least with the full series applied, there is no 
increase of rspinlock_t in debug build (if we compare to the version you 
provided in this series).

Furthermore, this is going to remove at least patch #6 and #8. We would 
still need nrspinlock_* because they can just be wrapper to
spin_barrier(&lock->lock).

This should also solve his concern of unwieldy code:

 > +    spin_barrier(&p2m->pod.lock.lock.lock);

Cheers,
Jürgen Groß Dec. 13, 2023, 9:17 a.m. UTC | #4
On 13.12.23 09:43, Julien Grall wrote:
> Hi Juergen,
> 
> On 13/12/2023 06:23, Juergen Gross wrote:
>> On 12.12.23 20:10, Julien Grall wrote:
>>> Hi Juergen,
>>>
>>> On 12/12/2023 09:47, Juergen Gross wrote:
>>>> Add another function level in spinlock.c hiding the spinlock_t layout
>>>> from the low level locking code.
>>>>
>>>> This is done in preparation of introducing rspinlock_t for recursive
>>>> locks without having to duplicate all of the locking code.
>>>
>>> So all the fields you pass are the one from spinlock.
>>>
>>> Looking at pahole after this series is applid, we have:
>>>
>>> ==== Debug + Lock profile ====
>>>
>>> struct spinlock {
>>>          spinlock_tickets_t         tickets;              /*     0 4 */
>>>          union lock_debug           debug;                /*     4 4 */
>>>          struct lock_profile *      profile;              /*     8 8 */
>>>
>>>          /* size: 16, cachelines: 1, members: 3 */
>>>          /* last cacheline: 16 bytes */
>>> };
>>> struct rspinlock {
>>>          spinlock_tickets_t         tickets;              /*     0 4 */
>>>          uint16_t                   recurse_cpu;          /*     4 2 */
>>>          uint8_t                    recurse_cnt;          /*     6 1 */
>>>
>>>          /* XXX 1 byte hole, try to pack */
>>>
>>>          union lock_debug           debug;                /*     8 4 */
>>>
>>>          /* XXX 4 bytes hole, try to pack */
>>>
>>>          struct lock_profile *      profile;              /*    16 8 */
>>>
>>>          /* size: 24, cachelines: 1, members: 5 */
>>>          /* sum members: 19, holes: 2, sum holes: 5 */
>>>          /* last cacheline: 24 bytes */
>>> };
>>>
>>>
>>> ==== Debug ====
>>>
>>> struct spinlock {
>>>          spinlock_tickets_t         tickets;              /*     0 4 */
>>>          union lock_debug           debug;                /*     4 4 */
>>>
>>>          /* size: 8, cachelines: 1, members: 2 */
>>>          /* last cacheline: 8 bytes */
>>> };
>>> struct rspinlock {
>>>          spinlock_tickets_t         tickets;              /*     0 4 */
>>>          uint16_t                   recurse_cpu;          /*     4 2 */
>>>          uint8_t                    recurse_cnt;          /*     6 1 */
>>>
>>>          /* XXX 1 byte hole, try to pack */
>>>
>>>          union lock_debug           debug;                /*     8 4 */
>>>
>>>          /* size: 12, cachelines: 1, members: 4 */
>>>          /* sum members: 11, holes: 1, sum holes: 1 */
>>>          /* last cacheline: 12 bytes */
>>> };
>>>
>>> ==== Prod ====
>>>
>>> struct spinlock {
>>>          spinlock_tickets_t         tickets;              /*     0 4 */
>>>          union lock_debug           debug;                /*     4 0 */
>>>
>>>          /* size: 4, cachelines: 1, members: 2 */
>>>          /* last cacheline: 4 bytes */
>>> };
>>> struct rspinlock {
>>>          spinlock_tickets_t         tickets;              /*     0 4 */
>>>          uint16_t                   recurse_cpu;          /*     4 2 */
>>>          uint8_t                    recurse_cnt;          /*     6 1 */
>>>          union lock_debug           debug;                /*     7 0 */
>>>
>>>          /* size: 8, cachelines: 1, members: 4 */
>>>          /* padding: 1 */
>>>          /* last cacheline: 8 bytes */
>>> };
>>>
>>>
>>> I think we could embed spinlock_t in rspinlock_t without increasing 
>>> rspinlock_t. Have you considered it? This could reduce the number of function 
>>> level introduced in this series.
>>
>> That was the layout in the first version of this series. Jan requested to change
>> it to the current layout [1].
> 
> Ah... Looking through the reasoning, I have to disagree with Jan argumentations.

I would _really_ have liked you to mention this disagreement back then (you've
been on Cc: in the thread, too).

Letting me do a major rework and then after 2 more iterations of the series
requesting to undo most of the work isn't great.

> At least with the full series applied, there is no increase of rspinlock_t in 
> debug build (if we compare to the version you provided in this series).

That wasn't his sole reasoning, right?

> Furthermore, this is going to remove at least patch #6 and #8. We would still 
> need nrspinlock_* because they can just be wrapper to
> spin_barrier(&lock->lock).
> 
> This should also solve his concern of unwieldy code:
> 
>  > +    spin_barrier(&p2m->pod.lock.lock.lock);

Yes, but the demand to have optional fields at the end of the struct isn't
covered by your request.


Juergen
Julien Grall Dec. 13, 2023, 9:48 a.m. UTC | #5
On 13/12/2023 09:17, Juergen Gross wrote:
> On 13.12.23 09:43, Julien Grall wrote:
>> Hi Juergen,
>>
>> On 13/12/2023 06:23, Juergen Gross wrote:
>>> On 12.12.23 20:10, Julien Grall wrote:
>>>> Hi Juergen,
>>>>
>>>> On 12/12/2023 09:47, Juergen Gross wrote:
>>>>> Add another function level in spinlock.c hiding the spinlock_t layout
>>>>> from the low level locking code.
>>>>>
>>>>> This is done in preparation of introducing rspinlock_t for recursive
>>>>> locks without having to duplicate all of the locking code.
>>>>
>>>> So all the fields you pass are the one from spinlock.
>>>>
>>>> Looking at pahole after this series is applid, we have:
>>>>
>>>> ==== Debug + Lock profile ====
>>>>
>>>> struct spinlock {
>>>>          spinlock_tickets_t         tickets;              /*     0 4 */
>>>>          union lock_debug           debug;                /*     4 4 */
>>>>          struct lock_profile *      profile;              /*     8 8 */
>>>>
>>>>          /* size: 16, cachelines: 1, members: 3 */
>>>>          /* last cacheline: 16 bytes */
>>>> };
>>>> struct rspinlock {
>>>>          spinlock_tickets_t         tickets;              /*     0 4 */
>>>>          uint16_t                   recurse_cpu;          /*     4 2 */
>>>>          uint8_t                    recurse_cnt;          /*     6 1 */
>>>>
>>>>          /* XXX 1 byte hole, try to pack */
>>>>
>>>>          union lock_debug           debug;                /*     8 4 */
>>>>
>>>>          /* XXX 4 bytes hole, try to pack */
>>>>
>>>>          struct lock_profile *      profile;              /*    16 8 */
>>>>
>>>>          /* size: 24, cachelines: 1, members: 5 */
>>>>          /* sum members: 19, holes: 2, sum holes: 5 */
>>>>          /* last cacheline: 24 bytes */
>>>> };
>>>>
>>>>
>>>> ==== Debug ====
>>>>
>>>> struct spinlock {
>>>>          spinlock_tickets_t         tickets;              /*     0 4 */
>>>>          union lock_debug           debug;                /*     4 4 */
>>>>
>>>>          /* size: 8, cachelines: 1, members: 2 */
>>>>          /* last cacheline: 8 bytes */
>>>> };
>>>> struct rspinlock {
>>>>          spinlock_tickets_t         tickets;              /*     0 4 */
>>>>          uint16_t                   recurse_cpu;          /*     4 2 */
>>>>          uint8_t                    recurse_cnt;          /*     6 1 */
>>>>
>>>>          /* XXX 1 byte hole, try to pack */
>>>>
>>>>          union lock_debug           debug;                /*     8 4 */
>>>>
>>>>          /* size: 12, cachelines: 1, members: 4 */
>>>>          /* sum members: 11, holes: 1, sum holes: 1 */
>>>>          /* last cacheline: 12 bytes */
>>>> };
>>>>
>>>> ==== Prod ====
>>>>
>>>> struct spinlock {
>>>>          spinlock_tickets_t         tickets;              /*     0 4 */
>>>>          union lock_debug           debug;                /*     4 0 */
>>>>
>>>>          /* size: 4, cachelines: 1, members: 2 */
>>>>          /* last cacheline: 4 bytes */
>>>> };
>>>> struct rspinlock {
>>>>          spinlock_tickets_t         tickets;              /*     0 4 */
>>>>          uint16_t                   recurse_cpu;          /*     4 2 */
>>>>          uint8_t                    recurse_cnt;          /*     6 1 */
>>>>          union lock_debug           debug;                /*     7 0 */
>>>>
>>>>          /* size: 8, cachelines: 1, members: 4 */
>>>>          /* padding: 1 */
>>>>          /* last cacheline: 8 bytes */
>>>> };
>>>>
>>>>
>>>> I think we could embed spinlock_t in rspinlock_t without increasing 
>>>> rspinlock_t. Have you considered it? This could reduce the number of 
>>>> function level introduced in this series.
>>>
>>> That was the layout in the first version of this series. Jan 
>>> requested to change
>>> it to the current layout [1].
>>
>> Ah... Looking through the reasoning, I have to disagree with Jan 
>> argumentations.
> 
> I would _really_ have liked you to mention this disagreement back then 
> (you've
> been on Cc: in the thread, too).

Sorry for that. My e-mails backlog is quite large and I can't keep up 
with all the series.

> Letting me do a major rework and then after 2 more iterations of the series
> requesting to undo most of the work isn't great.

Indeed. But I note you continued without any additional feedback [1]. If 
you were not sure about the approach suggested by Jan, then why did you 
post two new versions? Shouldn't you have pinged the maintainers to make 
sure there is a consensus?

> 
>> At least with the full series applied, there is no increase of 
>> rspinlock_t in debug build (if we compare to the version you provided 
>> in this series).
> 
> That wasn't his sole reasoning, right?

I guess you mean the non-optional fields should always be at the same 
position?

> 
>> Furthermore, this is going to remove at least patch #6 and #8. We 
>> would still need nrspinlock_* because they can just be wrapper to
>> spin_barrier(&lock->lock).
>>
>> This should also solve his concern of unwieldy code:
>>
>>  > +    spin_barrier(&p2m->pod.lock.lock.lock);
> 
> Yes, but the demand to have optional fields at the end of the struct isn't
> covered by your request.

I note this was a preference and weight against code duplication. It is 
not clear to me whether Jan agrees with this extra work now.

Anyway, I am not against this approach and if this is what Jan much 
prefers then so be it. But I thought I would point out the additional 
complexity which doesn't seem to be worth it.

Cheers,

[1] https://lists.xen.org/archives/html/xen-devel/2022-12/msg01065.html
Jürgen Groß Dec. 13, 2023, 9:55 a.m. UTC | #6
On 13.12.23 10:48, Julien Grall wrote:
> 
> 
> On 13/12/2023 09:17, Juergen Gross wrote:
>> On 13.12.23 09:43, Julien Grall wrote:
>>> Hi Juergen,
>>>
>>> On 13/12/2023 06:23, Juergen Gross wrote:
>>>> On 12.12.23 20:10, Julien Grall wrote:
>>>>> Hi Juergen,
>>>>>
>>>>> On 12/12/2023 09:47, Juergen Gross wrote:
>>>>>> Add another function level in spinlock.c hiding the spinlock_t layout
>>>>>> from the low level locking code.
>>>>>>
>>>>>> This is done in preparation of introducing rspinlock_t for recursive
>>>>>> locks without having to duplicate all of the locking code.
>>>>>
>>>>> So all the fields you pass are the one from spinlock.
>>>>>
>>>>> Looking at pahole after this series is applid, we have:
>>>>>
>>>>> ==== Debug + Lock profile ====
>>>>>
>>>>> struct spinlock {
>>>>>          spinlock_tickets_t         tickets;              /*     0 4 */
>>>>>          union lock_debug           debug;                /*     4 4 */
>>>>>          struct lock_profile *      profile;              /*     8 8 */
>>>>>
>>>>>          /* size: 16, cachelines: 1, members: 3 */
>>>>>          /* last cacheline: 16 bytes */
>>>>> };
>>>>> struct rspinlock {
>>>>>          spinlock_tickets_t         tickets;              /*     0 4 */
>>>>>          uint16_t                   recurse_cpu;          /*     4 2 */
>>>>>          uint8_t                    recurse_cnt;          /*     6 1 */
>>>>>
>>>>>          /* XXX 1 byte hole, try to pack */
>>>>>
>>>>>          union lock_debug           debug;                /*     8 4 */
>>>>>
>>>>>          /* XXX 4 bytes hole, try to pack */
>>>>>
>>>>>          struct lock_profile *      profile;              /*    16 8 */
>>>>>
>>>>>          /* size: 24, cachelines: 1, members: 5 */
>>>>>          /* sum members: 19, holes: 2, sum holes: 5 */
>>>>>          /* last cacheline: 24 bytes */
>>>>> };
>>>>>
>>>>>
>>>>> ==== Debug ====
>>>>>
>>>>> struct spinlock {
>>>>>          spinlock_tickets_t         tickets;              /*     0 4 */
>>>>>          union lock_debug           debug;                /*     4 4 */
>>>>>
>>>>>          /* size: 8, cachelines: 1, members: 2 */
>>>>>          /* last cacheline: 8 bytes */
>>>>> };
>>>>> struct rspinlock {
>>>>>          spinlock_tickets_t         tickets;              /*     0 4 */
>>>>>          uint16_t                   recurse_cpu;          /*     4 2 */
>>>>>          uint8_t                    recurse_cnt;          /*     6 1 */
>>>>>
>>>>>          /* XXX 1 byte hole, try to pack */
>>>>>
>>>>>          union lock_debug           debug;                /*     8 4 */
>>>>>
>>>>>          /* size: 12, cachelines: 1, members: 4 */
>>>>>          /* sum members: 11, holes: 1, sum holes: 1 */
>>>>>          /* last cacheline: 12 bytes */
>>>>> };
>>>>>
>>>>> ==== Prod ====
>>>>>
>>>>> struct spinlock {
>>>>>          spinlock_tickets_t         tickets;              /*     0 4 */
>>>>>          union lock_debug           debug;                /*     4 0 */
>>>>>
>>>>>          /* size: 4, cachelines: 1, members: 2 */
>>>>>          /* last cacheline: 4 bytes */
>>>>> };
>>>>> struct rspinlock {
>>>>>          spinlock_tickets_t         tickets;              /*     0 4 */
>>>>>          uint16_t                   recurse_cpu;          /*     4 2 */
>>>>>          uint8_t                    recurse_cnt;          /*     6 1 */
>>>>>          union lock_debug           debug;                /*     7 0 */
>>>>>
>>>>>          /* size: 8, cachelines: 1, members: 4 */
>>>>>          /* padding: 1 */
>>>>>          /* last cacheline: 8 bytes */
>>>>> };
>>>>>
>>>>>
>>>>> I think we could embed spinlock_t in rspinlock_t without increasing 
>>>>> rspinlock_t. Have you considered it? This could reduce the number of 
>>>>> function level introduced in this series.
>>>>
>>>> That was the layout in the first version of this series. Jan requested to 
>>>> change
>>>> it to the current layout [1].
>>>
>>> Ah... Looking through the reasoning, I have to disagree with Jan argumentations.
>>
>> I would _really_ have liked you to mention this disagreement back then (you've
>> been on Cc: in the thread, too).
> 
> Sorry for that. My e-mails backlog is quite large and I can't keep up with all 
> the series.
> 
>> Letting me do a major rework and then after 2 more iterations of the series
>> requesting to undo most of the work isn't great.
> 
> Indeed. But I note you continued without any additional feedback [1]. If you 
> were not sure about the approach suggested by Jan, then why did you post two new 
> versions? Shouldn't you have pinged the maintainers to make sure there is a 
> consensus?

https://lists.xen.org/archives/html/xen-devel/2023-10/msg01221.html

> 
>>
>>> At least with the full series applied, there is no increase of rspinlock_t in 
>>> debug build (if we compare to the version you provided in this series).
>>
>> That wasn't his sole reasoning, right?
> 
> I guess you mean the non-optional fields should always be at the same position?

Yes.

> 
>>
>>> Furthermore, this is going to remove at least patch #6 and #8. We would still 
>>> need nrspinlock_* because they can just be wrapper to
>>> spin_barrier(&lock->lock).
>>>
>>> This should also solve his concern of unwieldy code:
>>>
>>>  > +    spin_barrier(&p2m->pod.lock.lock.lock);
>>
>> Yes, but the demand to have optional fields at the end of the struct isn't
>> covered by your request.
> 
> I note this was a preference and weight against code duplication. It is not 
> clear to me whether Jan agrees with this extra work now.
> 
> Anyway, I am not against this approach and if this is what Jan much prefers then 
> so be it. But I thought I would point out the additional complexity which 
> doesn't seem to be worth it.

Thanks for the clarification.

Jan?


Juergen
Jan Beulich Dec. 13, 2023, 10:04 a.m. UTC | #7
On 13.12.2023 10:48, Julien Grall wrote:
> On 13/12/2023 09:17, Juergen Gross wrote:
>> On 13.12.23 09:43, Julien Grall wrote:
>>> On 13/12/2023 06:23, Juergen Gross wrote:
>>>> On 12.12.23 20:10, Julien Grall wrote:
>>>>> On 12/12/2023 09:47, Juergen Gross wrote:
>>>>>> Add another function level in spinlock.c hiding the spinlock_t layout
>>>>>> from the low level locking code.
>>>>>>
>>>>>> This is done in preparation of introducing rspinlock_t for recursive
>>>>>> locks without having to duplicate all of the locking code.
>>>>>
>>>>> So all the fields you pass are the one from spinlock.
>>>>>
>>>>> Looking at pahole after this series is applid, we have:
>>>>>
>>>>> ==== Debug + Lock profile ====
>>>>>
>>>>> struct spinlock {
>>>>>          spinlock_tickets_t         tickets;              /*     0 4 */
>>>>>          union lock_debug           debug;                /*     4 4 */
>>>>>          struct lock_profile *      profile;              /*     8 8 */
>>>>>
>>>>>          /* size: 16, cachelines: 1, members: 3 */
>>>>>          /* last cacheline: 16 bytes */
>>>>> };
>>>>> struct rspinlock {
>>>>>          spinlock_tickets_t         tickets;              /*     0 4 */
>>>>>          uint16_t                   recurse_cpu;          /*     4 2 */
>>>>>          uint8_t                    recurse_cnt;          /*     6 1 */
>>>>>
>>>>>          /* XXX 1 byte hole, try to pack */
>>>>>
>>>>>          union lock_debug           debug;                /*     8 4 */
>>>>>
>>>>>          /* XXX 4 bytes hole, try to pack */
>>>>>
>>>>>          struct lock_profile *      profile;              /*    16 8 */
>>>>>
>>>>>          /* size: 24, cachelines: 1, members: 5 */
>>>>>          /* sum members: 19, holes: 2, sum holes: 5 */
>>>>>          /* last cacheline: 24 bytes */
>>>>> };
>>>>>
>>>>>
>>>>> ==== Debug ====
>>>>>
>>>>> struct spinlock {
>>>>>          spinlock_tickets_t         tickets;              /*     0 4 */
>>>>>          union lock_debug           debug;                /*     4 4 */
>>>>>
>>>>>          /* size: 8, cachelines: 1, members: 2 */
>>>>>          /* last cacheline: 8 bytes */
>>>>> };
>>>>> struct rspinlock {
>>>>>          spinlock_tickets_t         tickets;              /*     0 4 */
>>>>>          uint16_t                   recurse_cpu;          /*     4 2 */
>>>>>          uint8_t                    recurse_cnt;          /*     6 1 */
>>>>>
>>>>>          /* XXX 1 byte hole, try to pack */
>>>>>
>>>>>          union lock_debug           debug;                /*     8 4 */
>>>>>
>>>>>          /* size: 12, cachelines: 1, members: 4 */
>>>>>          /* sum members: 11, holes: 1, sum holes: 1 */
>>>>>          /* last cacheline: 12 bytes */
>>>>> };
>>>>>
>>>>> ==== Prod ====
>>>>>
>>>>> struct spinlock {
>>>>>          spinlock_tickets_t         tickets;              /*     0 4 */
>>>>>          union lock_debug           debug;                /*     4 0 */
>>>>>
>>>>>          /* size: 4, cachelines: 1, members: 2 */
>>>>>          /* last cacheline: 4 bytes */
>>>>> };
>>>>> struct rspinlock {
>>>>>          spinlock_tickets_t         tickets;              /*     0 4 */
>>>>>          uint16_t                   recurse_cpu;          /*     4 2 */
>>>>>          uint8_t                    recurse_cnt;          /*     6 1 */
>>>>>          union lock_debug           debug;                /*     7 0 */
>>>>>
>>>>>          /* size: 8, cachelines: 1, members: 4 */
>>>>>          /* padding: 1 */
>>>>>          /* last cacheline: 8 bytes */
>>>>> };
>>>>>
>>>>>
>>>>> I think we could embed spinlock_t in rspinlock_t without increasing 
>>>>> rspinlock_t. Have you considered it? This could reduce the number of 
>>>>> function level introduced in this series.
>>>>
>>>> That was the layout in the first version of this series. Jan 
>>>> requested to change
>>>> it to the current layout [1].
>>>
>>> Ah... Looking through the reasoning, I have to disagree with Jan 
>>> argumentations.
>>
>> I would _really_ have liked you to mention this disagreement back then 
>> (you've
>> been on Cc: in the thread, too).
> 
> Sorry for that. My e-mails backlog is quite large and I can't keep up 
> with all the series.
> 
>> Letting me do a major rework and then after 2 more iterations of the series
>> requesting to undo most of the work isn't great.
> 
> Indeed. But I note you continued without any additional feedback [1]. If 
> you were not sure about the approach suggested by Jan, then why did you 
> post two new versions? Shouldn't you have pinged the maintainers to make 
> sure there is a consensus?

I think this is unfair to Jürgen. We use the lazy consensus model generally,
and hence no replies generally mean consensus. Also note that it has been
very close to a fully year between my review comments back then and now. It
has been well over a year from the original posting of the RFC.

That said, I also understand that in particular RFCs receive less attention,
no matter that this is entirely contrary to their purpose. That's all the
same for me - I hardly ever look at RFCs as long as there are still non-RFC
patches pending review. Which in reality means it is close to impossible to
ever look at RFCs.

>>> At least with the full series applied, there is no increase of 
>>> rspinlock_t in debug build (if we compare to the version you provided 
>>> in this series).
>>
>> That wasn't his sole reasoning, right?
> 
> I guess you mean the non-optional fields should always be at the same 
> position?

I consider this at least desirable, yes.

>>> Furthermore, this is going to remove at least patch #6 and #8. We 
>>> would still need nrspinlock_* because they can just be wrapper to
>>> spin_barrier(&lock->lock).
>>>
>>> This should also solve his concern of unwieldy code:
>>>
>>>  > +    spin_barrier(&p2m->pod.lock.lock.lock);
>>
>> Yes, but the demand to have optional fields at the end of the struct isn't
>> covered by your request.
> 
> I note this was a preference and weight against code duplication. It is 
> not clear to me whether Jan agrees with this extra work now.

Well, at the time I said I think "that's a reasonable price to pay", to
further state "with some de-duplication potential".

> Anyway, I am not against this approach and if this is what Jan much 
> prefers then so be it. But I thought I would point out the additional 
> complexity which doesn't seem to be worth it.

It's not "much", I would say, but some of the earlier oddities (like also
the .lock.lock.lock) would be really nice if they went away.

Jan
Jan Beulich Dec. 13, 2023, 10:06 a.m. UTC | #8
On 13.12.2023 10:55, Juergen Gross wrote:
> On 13.12.23 10:48, Julien Grall wrote:
>> I note this was a preference and weight against code duplication. It is not 
>> clear to me whether Jan agrees with this extra work now.
>>
>> Anyway, I am not against this approach and if this is what Jan much prefers then 
>> so be it. But I thought I would point out the additional complexity which 
>> doesn't seem to be worth it.
> 
> Thanks for the clarification.
> 
> Jan?

Just to clarify: While I have replied to Julien's mail just a minute ago,
I didn't get around yet to look at the actual patch. That reply of mine
was purely based on what I said earlier on.

Jan
Jan Beulich Feb. 29, 2024, 1:59 p.m. UTC | #9
On 12.12.2023 10:47, Juergen Gross wrote:
> @@ -377,25 +388,25 @@ void _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
>      local_irq_restore(flags);
>  }
>  
> +static int always_inline spin_is_locked_common(const spinlock_tickets_t *t)
> +{
> +    return t->head != t->tail;
> +}
> +
>  int _spin_is_locked(const spinlock_t *lock)
>  {
> -    /*
> -     * Recursive locks may be locked by another CPU, yet we return
> -     * "false" here, making this function suitable only for use in
> -     * ASSERT()s and alike.
> -     */
> -    return lock->recurse_cpu == SPINLOCK_NO_CPU
> -           ? lock->tickets.head != lock->tickets.tail
> -           : lock->recurse_cpu == smp_processor_id();
> +    return spin_is_locked_common(&lock->tickets);
>  }

This looks like a functional change. I haven't spotted an adjustment in an
earlier patch that would make the lost case unnecessary, but even if there
was one, the removal thereof would then also want doing there, I think.

Jan
diff mbox series

Patch

diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
index 7d611d3d7d..31d12b1006 100644
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -261,29 +261,31 @@  void spin_debug_disable(void)
 
 #ifdef CONFIG_DEBUG_LOCK_PROFILE
 
+#define LOCK_PROFILE_PAR lock->profile
 #define LOCK_PROFILE_REL                                                     \
-    if ( lock->profile )                                                     \
+    if ( profile )                                                           \
     {                                                                        \
-        lock->profile->time_hold += NOW() - lock->profile->time_locked;      \
-        lock->profile->lock_cnt++;                                           \
+        profile->time_hold += NOW() - profile->time_locked;                  \
+        profile->lock_cnt++;                                                 \
     }
 #define LOCK_PROFILE_VAR(var, val)    s_time_t var = (val)
 #define LOCK_PROFILE_BLOCK(var)       var = var ? : NOW()
 #define LOCK_PROFILE_BLKACC(tst, val)                                        \
     if ( tst )                                                               \
     {                                                                        \
-        lock->profile->time_block += lock->profile->time_locked - (val);     \
-        lock->profile->block_cnt++;                                          \
+        profile->time_block += profile->time_locked - (val);                 \
+        profile->block_cnt++;                                                \
     }
 #define LOCK_PROFILE_GOT(val)                                                \
-    if ( lock->profile )                                                     \
+    if ( profile )                                                           \
     {                                                                        \
-        lock->profile->time_locked = NOW();                                  \
+        profile->time_locked = NOW();                                        \
         LOCK_PROFILE_BLKACC(val, val);                                       \
     }
 
 #else
 
+#define LOCK_PROFILE_PAR NULL
 #define LOCK_PROFILE_REL
 #define LOCK_PROFILE_VAR(var, val)
 #define LOCK_PROFILE_BLOCK(var)
@@ -307,17 +309,18 @@  static always_inline uint16_t observe_head(const spinlock_tickets_t *t)
     return read_atomic(&t->head);
 }
 
-static void always_inline spin_lock_common(spinlock_t *lock,
+static void always_inline spin_lock_common(spinlock_tickets_t *t,
+                                           union lock_debug *debug,
+                                           struct lock_profile *profile,
                                            void (*cb)(void *data), void *data)
 {
     spinlock_tickets_t tickets = SPINLOCK_TICKET_INC;
     LOCK_PROFILE_VAR(block, 0);
 
-    check_lock(&lock->debug, false);
+    check_lock(debug, false);
     preempt_disable();
-    tickets.head_tail = arch_fetch_and_add(&lock->tickets.head_tail,
-                                           tickets.head_tail);
-    while ( tickets.tail != observe_head(&lock->tickets) )
+    tickets.head_tail = arch_fetch_and_add(&t->head_tail, tickets.head_tail);
+    while ( tickets.tail != observe_head(t) )
     {
         LOCK_PROFILE_BLOCK(block);
         if ( cb )
@@ -325,18 +328,19 @@  static void always_inline spin_lock_common(spinlock_t *lock,
         arch_lock_relax();
     }
     arch_lock_acquire_barrier();
-    got_lock(&lock->debug);
+    got_lock(debug);
     LOCK_PROFILE_GOT(block);
 }
 
 void _spin_lock(spinlock_t *lock)
 {
-    spin_lock_common(lock, NULL, NULL);
+    spin_lock_common(&lock->tickets, &lock->debug, LOCK_PROFILE_PAR, NULL,
+                     NULL);
 }
 
 void _spin_lock_cb(spinlock_t *lock, void (*cb)(void *data), void *data)
 {
-    spin_lock_common(lock, cb, data);
+    spin_lock_common(&lock->tickets, &lock->debug, LOCK_PROFILE_PAR, cb, data);
 }
 
 void _spin_lock_irq(spinlock_t *lock)
@@ -355,16 +359,23 @@  unsigned long _spin_lock_irqsave(spinlock_t *lock)
     return flags;
 }
 
-void _spin_unlock(spinlock_t *lock)
+static void always_inline spin_unlock_common(spinlock_tickets_t *t,
+                                             union lock_debug *debug,
+                                             struct lock_profile *profile)
 {
     LOCK_PROFILE_REL;
-    rel_lock(&lock->debug);
+    rel_lock(debug);
     arch_lock_release_barrier();
-    add_sized(&lock->tickets.head, 1);
+    add_sized(&t->head, 1);
     arch_lock_signal();
     preempt_enable();
 }
 
+void _spin_unlock(spinlock_t *lock)
+{
+    spin_unlock_common(&lock->tickets, &lock->debug, LOCK_PROFILE_PAR);
+}
+
 void _spin_unlock_irq(spinlock_t *lock)
 {
     _spin_unlock(lock);
@@ -377,25 +388,25 @@  void _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
     local_irq_restore(flags);
 }
 
+static int always_inline spin_is_locked_common(const spinlock_tickets_t *t)
+{
+    return t->head != t->tail;
+}
+
 int _spin_is_locked(const spinlock_t *lock)
 {
-    /*
-     * Recursive locks may be locked by another CPU, yet we return
-     * "false" here, making this function suitable only for use in
-     * ASSERT()s and alike.
-     */
-    return lock->recurse_cpu == SPINLOCK_NO_CPU
-           ? lock->tickets.head != lock->tickets.tail
-           : lock->recurse_cpu == smp_processor_id();
+    return spin_is_locked_common(&lock->tickets);
 }
 
-int _spin_trylock(spinlock_t *lock)
+static int always_inline spin_trylock_common(spinlock_tickets_t *t,
+                                             union lock_debug *debug,
+                                             struct lock_profile *profile)
 {
     spinlock_tickets_t old, new;
 
     preempt_disable();
-    check_lock(&lock->debug, true);
-    old = observe_lock(&lock->tickets);
+    check_lock(debug, true);
+    old = observe_lock(t);
     if ( old.head != old.tail )
     {
         preempt_enable();
@@ -403,8 +414,7 @@  int _spin_trylock(spinlock_t *lock)
     }
     new = old;
     new.tail++;
-    if ( cmpxchg(&lock->tickets.head_tail,
-                 old.head_tail, new.head_tail) != old.head_tail )
+    if ( cmpxchg(&t->head_tail, old.head_tail, new.head_tail) != old.head_tail )
     {
         preempt_enable();
         return 0;
@@ -413,29 +423,41 @@  int _spin_trylock(spinlock_t *lock)
      * cmpxchg() is a full barrier so no need for an
      * arch_lock_acquire_barrier().
      */
-    got_lock(&lock->debug);
+    got_lock(debug);
     LOCK_PROFILE_GOT(0);
 
     return 1;
 }
 
-void _spin_barrier(spinlock_t *lock)
+int _spin_trylock(spinlock_t *lock)
+{
+    return spin_trylock_common(&lock->tickets, &lock->debug, LOCK_PROFILE_PAR);
+}
+
+static void always_inline spin_barrier_common(spinlock_tickets_t *t,
+                                              union lock_debug *debug,
+                                              struct lock_profile *profile)
 {
     spinlock_tickets_t sample;
     LOCK_PROFILE_VAR(block, NOW());
 
-    check_barrier(&lock->debug);
+    check_barrier(debug);
     smp_mb();
-    sample = observe_lock(&lock->tickets);
+    sample = observe_lock(t);
     if ( sample.head != sample.tail )
     {
-        while ( observe_head(&lock->tickets) == sample.head )
+        while ( observe_head(t) == sample.head )
             arch_lock_relax();
-        LOCK_PROFILE_BLKACC(lock->profile, block);
+        LOCK_PROFILE_BLKACC(profile, block);
     }
     smp_mb();
 }
 
+void _spin_barrier(spinlock_t *lock)
+{
+    spin_barrier_common(&lock->tickets, &lock->debug, LOCK_PROFILE_PAR);
+}
+
 int rspin_trylock(rspinlock_t *lock)
 {
     unsigned int cpu = smp_processor_id();
@@ -448,7 +470,8 @@  int rspin_trylock(rspinlock_t *lock)
 
     if ( likely(lock->recurse_cpu != cpu) )
     {
-        if ( !spin_trylock(lock) )
+        if ( !spin_trylock_common(&lock->tickets, &lock->debug,
+                                  LOCK_PROFILE_PAR) )
             return 0;
         lock->recurse_cpu = cpu;
     }
@@ -466,7 +489,8 @@  void rspin_lock(rspinlock_t *lock)
 
     if ( likely(lock->recurse_cpu != cpu) )
     {
-        _spin_lock(lock);
+        spin_lock_common(&lock->tickets, &lock->debug, LOCK_PROFILE_PAR, NULL,
+                         NULL);
         lock->recurse_cpu = cpu;
     }
 
@@ -490,7 +514,7 @@  void rspin_unlock(rspinlock_t *lock)
     if ( likely(--lock->recurse_cnt == 0) )
     {
         lock->recurse_cpu = SPINLOCK_NO_CPU;
-        spin_unlock(lock);
+        spin_unlock_common(&lock->tickets, &lock->debug, LOCK_PROFILE_PAR);
     }
 }
 
diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
index 82ef99d3b6..d6f4b66613 100644
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -163,6 +163,7 @@  extern void cf_check spinlock_profile_reset(unsigned char key);
 #else
 
 struct lock_profile_qhead { };
+struct lock_profile { };
 
 #define SPIN_LOCK_UNLOCKED {                                                  \
     .recurse_cpu = SPINLOCK_NO_CPU,                                           \