diff mbox series

[1/4] coresight: tmc-etr: Advance buffer pointer in sync buffer.

Message ID 20210421120413.3110775-2-daniel.kiss@arm.com (mailing list archive)
State New, archived
Headers show
Series coresight: Add ETR-PERF polling. | expand

Commit Message

Daniel Kiss April 21, 2021, 12:04 p.m. UTC
With polling the sync could called multiple times in a row. Without this
change the data will be overwritten at the beginning of the buffer.

Signed-off-by: Daniel Kiss <daniel.kiss@arm.com>
Signed-off-by: Branislav Rankov <Branislav.Rankov@arm.com>
---
 drivers/hwtracing/coresight/coresight-tmc-etr.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

Comments

Leo Yan April 23, 2021, 8:23 a.m. UTC | #1
On Wed, Apr 21, 2021 at 02:04:10PM +0200, Daniel Kiss wrote:
> With polling the sync could called multiple times in a row. Without this
> change the data will be overwritten at the beginning of the buffer.
> 
> Signed-off-by: Daniel Kiss <daniel.kiss@arm.com>
> Signed-off-by: Branislav Rankov <Branislav.Rankov@arm.com>

From the last week, I spent time to dig into AUX trace buffer, thus
this patch set gives me a good chance to apply my learning for perf
ring buffer.

For this patch:

Reviewed-by: Leo Yan <leo.yan@linaro.org>


> ---
>  drivers/hwtracing/coresight/coresight-tmc-etr.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
> index ea5027e397d02..dd19d1d1c3b38 100644
> --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
> +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
> @@ -1442,7 +1442,7 @@ static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf,
>  {
>  	long bytes;
>  	long pg_idx, pg_offset;
> -	unsigned long head = etr_perf->head;
> +	unsigned long head;
>  	char **dst_pages, *src_buf;
>  	struct etr_buf *etr_buf = etr_perf->etr_buf;
>  
> @@ -1465,7 +1465,7 @@ static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf,
>  		bytes = tmc_etr_buf_get_data(etr_buf, src_offset, to_copy,
>  					     &src_buf);
>  		if (WARN_ON_ONCE(bytes <= 0))
> -			break;
> +			return;
>  		bytes = min(bytes, (long)(PAGE_SIZE - pg_offset));
>  
>  		memcpy(dst_pages[pg_idx] + pg_offset, src_buf, bytes);
> @@ -1483,6 +1483,7 @@ static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf,
>  		/* Move source pointers */
>  		src_offset += bytes;
>  	}
> +	etr_perf->head = (pg_idx << PAGE_SHIFT) + pg_offset;
>  }
>  
>  /*
> -- 
> 2.25.1
>
Suzuki K Poulose April 26, 2021, 10:40 a.m. UTC | #2
On 21/04/2021 13:04, Daniel Kiss wrote:
> With polling the sync could called multiple times in a row. Without this
> change the data will be overwritten at the beginning of the buffer.
> 
> Signed-off-by: Daniel Kiss <daniel.kiss@arm.com>
> Signed-off-by: Branislav Rankov <Branislav.Rankov@arm.com>
> ---
>   drivers/hwtracing/coresight/coresight-tmc-etr.c | 5 +++--
>   1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
> index ea5027e397d02..dd19d1d1c3b38 100644
> --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
> +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
> @@ -1442,7 +1442,7 @@ static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf,
>   {
>   	long bytes;
>   	long pg_idx, pg_offset;
> -	unsigned long head = etr_perf->head;
> +	unsigned long head;
>   	char **dst_pages, *src_buf;
>   	struct etr_buf *etr_buf = etr_perf->etr_buf;
>   
> @@ -1465,7 +1465,7 @@ static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf,
>   		bytes = tmc_etr_buf_get_data(etr_buf, src_offset, to_copy,
>   					     &src_buf);
>   		if (WARN_ON_ONCE(bytes <= 0))
> -			break;
> +			return;
>   		bytes = min(bytes, (long)(PAGE_SIZE - pg_offset));
>   
>   		memcpy(dst_pages[pg_idx] + pg_offset, src_buf, bytes);
> @@ -1483,6 +1483,7 @@ static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf,
>   		/* Move source pointers */
>   		src_offset += bytes;
>   	}
> +	etr_perf->head = (pg_idx << PAGE_SHIFT) + pg_offset;


Looking at this patch, I feel the driver is doing a couple wrong things
already.

1) We initialise etr_perf->head every time the ETR enable is called, 
irrespective of whether we actually try to enable the Hardware. e.g,

etm_0 on -> .. -> enable_etr :
etr_perf->head = <head of the handle_0>
   enable_hw()

emt_1 on -> ... -> enable_etr:
   etr_perf->head = <head of the handle_1>
   already_enabled, skip enable_hw()

etm_2 on -> ... -> enable_etr:
   etr_perf->head = <head of the handle_2>
   already_enable, skip enable_hw()...


This doesn't look correct as we don't know which handle is going to get 
the data. This looks pointless.

2) Even more problematic is where we copy the AUX buffer content to.
As mentioned above, we don't know which handle is going to be the last
one to consume and we have a "etr_perf->head" that came from one of the 
handles and the "pages" that came from the first handle which created a
etr_perf buffer. In sync_perf_buffer() we copy the hardware buffers to
the "pages" (say of handle_0) with "etr_perf->head" (which could be from
any other handle, say handle_2) and then we could return the number of 
bytes copied, which then is used to update the last handle (could be say 
handle_3), where there is no actual data copied.

To fix all of these issues, we must
1) Stop using etr_perf->head, and instead use the handle->head where we 
are called update_buffer on.

2) Keep track of the "pages" that belong to a given "handle" and then 
use those pages to copy the data to the current handle we are called to 
update the buffer on.

Did I get this wrong ?

Suzuki
Leo Yan April 27, 2021, 3:45 a.m. UTC | #3
On Mon, Apr 26, 2021 at 11:40:44AM +0100, Suzuki Kuruppassery Poulose wrote:

[...]

> > @@ -1442,7 +1442,7 @@ static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf,
> >   {
> >   	long bytes;
> >   	long pg_idx, pg_offset;
> > -	unsigned long head = etr_perf->head;
> > +	unsigned long head;
> >   	char **dst_pages, *src_buf;
> >   	struct etr_buf *etr_buf = etr_perf->etr_buf;
> > @@ -1465,7 +1465,7 @@ static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf,
> >   		bytes = tmc_etr_buf_get_data(etr_buf, src_offset, to_copy,
> >   					     &src_buf);
> >   		if (WARN_ON_ONCE(bytes <= 0))
> > -			break;
> > +			return;
> >   		bytes = min(bytes, (long)(PAGE_SIZE - pg_offset));
> >   		memcpy(dst_pages[pg_idx] + pg_offset, src_buf, bytes);
> > @@ -1483,6 +1483,7 @@ static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf,
> >   		/* Move source pointers */
> >   		src_offset += bytes;
> >   	}
> > +	etr_perf->head = (pg_idx << PAGE_SHIFT) + pg_offset;
> 
> 
> Looking at this patch, I feel the driver is doing a couple wrong things
> already.
> 
> 1) We initialise etr_perf->head every time the ETR enable is called,
> irrespective of whether we actually try to enable the Hardware. e.g,
> 
> etm_0 on -> .. -> enable_etr :
> etr_perf->head = <head of the handle_0>
>   enable_hw()
> 
> emt_1 on -> ... -> enable_etr:
>   etr_perf->head = <head of the handle_1>
>   already_enabled, skip enable_hw()
> 
> etm_2 on -> ... -> enable_etr:
>   etr_perf->head = <head of the handle_2>
>   already_enable, skip enable_hw()...
> 
> 
> This doesn't look correct as we don't know which handle is going to get the
> data. This looks pointless.

I'd like to convert mapping into below diagram (for system wide trace):

  CPU0: AUX RB (perf_output_handle_0) -> etr_perf ->  +---------+
  CPU1: AUX RB (perf_output_handle_1) -> etr_perf ->  | etr_buf |
  CPU2: AUX RB (perf_output_handle_2) -> etr_perf ->  |         |
  CPU3: AUX RB (perf_output_handle_3) -> etr_perf ->  +---------+

Simply to say, there have two layers for controlling ring buffer, one
layer is for perf AUX ring buffer, it mainly uses the structure
perf_output_handle to manage the ring buffer.  And in the ETR driver,
it uses structure etr_perf to manage the header pointer for copying
data into ETR buffer (tagged as "etr_buf").

ETR buffer is the single one, but the structures "perf_output_handle"
and "etr_perf" are per CPU.  We have multiple copies for the headers and
tails to manage a single buffer, but the problem is these multiple
copies have not been synced with each other.

> 2) Even more problematic is where we copy the AUX buffer content to.
> As mentioned above, we don't know which handle is going to be the last
> one to consume and we have a "etr_perf->head" that came from one of the
> handles and the "pages" that came from the first handle which created a
> etr_perf buffer. In sync_perf_buffer() we copy the hardware buffers to
> the "pages" (say of handle_0) with "etr_perf->head" (which could be from
> any other handle, say handle_2) and then we could return the number of bytes
> copied, which then is used to update the last handle (could be say
> handle_3), where there is no actual data copied.
> 
> To fix all of these issues, we must
> 1) Stop using etr_perf->head, and instead use the handle->head where we are
> called update_buffer on.
> 
> 2) Keep track of the "pages" that belong to a given "handle" and then use
> those pages to copy the data to the current handle we are called to update
> the buffer on.

The "pages" are only allocated once, even they are attached to multiple
handles.  I think the right way is to use the single structure
"etr_perf" and single "perf_output_handle" to manage the "pages", IOW,
if there have single buffer, then we just use one copy of header and
tail to manage it.

The difficult thing is how to use the single one "perf_output_handle"
to manage the AUX ring buffer and notify to user space.  I am
wandering if we can only use CPU0's perf_output_handle to manage the
AUX ring buffer, if any other CPUs read out the data, they always use
CPU0's perf_output_handle.

Thanks,
Leo
Suzuki K Poulose April 27, 2021, 10 a.m. UTC | #4
On 27/04/2021 04:45, Leo Yan wrote:
> On Mon, Apr 26, 2021 at 11:40:44AM +0100, Suzuki Kuruppassery Poulose wrote:
> 
> [...]
> 
>>> @@ -1442,7 +1442,7 @@ static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf,
>>>    {
>>>    	long bytes;
>>>    	long pg_idx, pg_offset;
>>> -	unsigned long head = etr_perf->head;
>>> +	unsigned long head;
>>>    	char **dst_pages, *src_buf;
>>>    	struct etr_buf *etr_buf = etr_perf->etr_buf;
>>> @@ -1465,7 +1465,7 @@ static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf,
>>>    		bytes = tmc_etr_buf_get_data(etr_buf, src_offset, to_copy,
>>>    					     &src_buf);
>>>    		if (WARN_ON_ONCE(bytes <= 0))
>>> -			break;
>>> +			return;
>>>    		bytes = min(bytes, (long)(PAGE_SIZE - pg_offset));
>>>    		memcpy(dst_pages[pg_idx] + pg_offset, src_buf, bytes);
>>> @@ -1483,6 +1483,7 @@ static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf,
>>>    		/* Move source pointers */
>>>    		src_offset += bytes;
>>>    	}
>>> +	etr_perf->head = (pg_idx << PAGE_SHIFT) + pg_offset;
>>
>>
>> Looking at this patch, I feel the driver is doing a couple wrong things
>> already.
>>
>> 1) We initialise etr_perf->head every time the ETR enable is called,
>> irrespective of whether we actually try to enable the Hardware. e.g,
>>
>> etm_0 on -> .. -> enable_etr :
>> etr_perf->head = <head of the handle_0>
>>    enable_hw()
>>
>> emt_1 on -> ... -> enable_etr:
>>    etr_perf->head = <head of the handle_1>
>>    already_enabled, skip enable_hw()
>>
>> etm_2 on -> ... -> enable_etr:
>>    etr_perf->head = <head of the handle_2>
>>    already_enable, skip enable_hw()...
>>
>>
>> This doesn't look correct as we don't know which handle is going to get the
>> data. This looks pointless.
> 
> I'd like to convert mapping into below diagram (for system wide trace):
> 
>    CPU0: AUX RB (perf_output_handle_0) -> etr_perf ->  +---------+
>    CPU1: AUX RB (perf_output_handle_1) -> etr_perf ->  | etr_buf |
>    CPU2: AUX RB (perf_output_handle_2) -> etr_perf ->  |         |
>    CPU3: AUX RB (perf_output_handle_3) -> etr_perf ->  +---------+ >

To make it more clear:

     CPU0: AUX RB (perf_output_handle_0) -> etr_perf0 ->  +---------+
     CPU1: AUX RB (perf_output_handle_1) -> etr_perf1 ->  |etr_buf0 |
     CPU2: AUX RB (perf_output_handle_2) -> etr_perf2 ->  |         |
     CPU3: AUX RB (perf_output_handle_3) -> etr_perf3 ->  +---------+


> Simply to say, there have two layers for controlling ring buffer, one
> layer is for perf AUX ring buffer, it mainly uses the structure
> perf_output_handle to manage the ring buffer.  And in the ETR driver,
> it uses structure etr_perf to manage the header pointer for copying
> data into ETR buffer (tagged as "etr_buf").
> 
> ETR buffer is the single one, but the structures "perf_output_handle"
> and "etr_perf" are per CPU.  We have multiple copies for the headers and

minor Correction, they are "per-event" to be precise. And there are 
events per-CPU in a system wide mode or task mode (but not per-thread 
mode). So, you are correct

> tails to manage a single buffer, but the problem is these multiple
> copies have not been synced with each other.
> 
>> 2) Even more problematic is where we copy the AUX buffer content to.
>> As mentioned above, we don't know which handle is going to be the last
>> one to consume and we have a "etr_perf->head" that came from one of the
>> handles and the "pages" that came from the first handle which created a
>> etr_perf buffer. In sync_perf_buffer() we copy the hardware buffers to
>> the "pages" (say of handle_0) with "etr_perf->head" (which could be from
>> any other handle, say handle_2) and then we could return the number of bytes
>> copied, which then is used to update the last handle (could be say
>> handle_3), where there is no actual data copied.

This is not valid and am relieved that the driver is correct. The 
assumption that there is only one etr_perf per ETR is incorrect as
pictured above.

>>
>> To fix all of these issues, we must
>> 1) Stop using etr_perf->head, and instead use the handle->head where we are
>> called update_buffer on.
>>
>> 2) Keep track of the "pages" that belong to a given "handle" and then use
>> those pages to copy the data to the current handle we are called to update
>> the buffer on.
> 
> The "pages" are only allocated once, even they are attached to multiple
> handles.  I think the right way is to use the single structure

I assume you mean the pages in the etr_buf and not etr_perf right ?

> "etr_perf" and single "perf_output_handle" to manage the "pages", IOW,
> if there have single buffer, then we just use one copy of header and
> tail to manage it.

I think this is not needed and the way we do things are fine and the 
patch as such looks correct to me.

The perf_output_handle is per-event and nothing that we can combine 
with. etr_perf captures what the "ouput_handle" stands for and is 
something necessary for syncing the buffer.

Now coming back to this patch, I understand that the sync_perf could be 
called with the polling patches multiple times. But don't we do a
perf_output_handle_end() each of the time we wake up ? (I haven't looked
at the later patches yet).

I would expect:

   perf_aux_output_begin() -> update the etr_perf-> head

   when we sync the buffer, we do :

  Poll-> sync_buffer-> perf_aux_output_end() and perf_aux_output_begin() 
-> update etr_perf->head.


Kind regards
Suzuki
Leo Yan April 28, 2021, 2:34 a.m. UTC | #5
On Tue, Apr 27, 2021 at 11:00:51AM +0100, Suzuki Kuruppassery Poulose wrote:

[...]

> To make it more clear:
> 
>     CPU0: AUX RB (perf_output_handle_0) -> etr_perf0 ->  +---------+
>     CPU1: AUX RB (perf_output_handle_1) -> etr_perf1 ->  |etr_buf0 |
>     CPU2: AUX RB (perf_output_handle_2) -> etr_perf2 ->  |         |
>     CPU3: AUX RB (perf_output_handle_3) -> etr_perf3 ->  +---------+

Thanks for the clarification.

> > Simply to say, there have two layers for controlling ring buffer, one
> > layer is for perf AUX ring buffer, it mainly uses the structure
> > perf_output_handle to manage the ring buffer.  And in the ETR driver,
> > it uses structure etr_perf to manage the header pointer for copying
> > data into ETR buffer (tagged as "etr_buf").
> > 
> > ETR buffer is the single one, but the structures "perf_output_handle"
> > and "etr_perf" are per CPU.  We have multiple copies for the headers and
> 
> minor Correction, they are "per-event" to be precise. And there are events
> per-CPU in a system wide mode or task mode (but not per-thread mode). So,
> you are correct
> 
> > tails to manage a single buffer, but the problem is these multiple
> > copies have not been synced with each other.
> > 
> > > 2) Even more problematic is where we copy the AUX buffer content to.
> > > As mentioned above, we don't know which handle is going to be the last
> > > one to consume and we have a "etr_perf->head" that came from one of the
> > > handles and the "pages" that came from the first handle which created a
> > > etr_perf buffer. In sync_perf_buffer() we copy the hardware buffers to
> > > the "pages" (say of handle_0) with "etr_perf->head" (which could be from
> > > any other handle, say handle_2) and then we could return the number of bytes
> > > copied, which then is used to update the last handle (could be say
> > > handle_3), where there is no actual data copied.
> 
> This is not valid and am relieved that the driver is correct. The assumption
> that there is only one etr_perf per ETR is incorrect as
> pictured above.

Yeah, etr_perf is per event wise.

> > > To fix all of these issues, we must
> > > 1) Stop using etr_perf->head, and instead use the handle->head where we are
> > > called update_buffer on.
> > > 
> > > 2) Keep track of the "pages" that belong to a given "handle" and then use
> > > those pages to copy the data to the current handle we are called to update
> > > the buffer on.
> > 
> > The "pages" are only allocated once, even they are attached to multiple
> > handles.  I think the right way is to use the single structure
> 
> I assume you mean the pages in the etr_buf and not etr_perf right ?

Yes.

> > "etr_perf" and single "perf_output_handle" to manage the "pages", IOW,
> > if there have single buffer, then we just use one copy of header and
> > tail to manage it.
> 
> I think this is not needed and the way we do things are fine and the patch
> as such looks correct to me.
> 
> The perf_output_handle is per-event and nothing that we can combine with.
> etr_perf captures what the "ouput_handle" stands for and is something
> necessary for syncing the buffer.

Correct myself for one thing.  At beginning I wrongly understood the AUX
buffer is only allocated once but mapped into VMA for multiple times for all
events, but this is wrong.  In the file kernel/events/ring_buffer.c,
function rb_alloc_aux() clearly shows the AUX ring buffer is allocated
for every event.  So the buffer management is as below:

        +---------+
  CPU0: | AUX RB0 | (perf_output_handle_0) -> etr_perf0 ->  +---------+
        +---------+                                         |         |
  CPU1: | AUX RB1 | (perf_output_handle_1) -> etr_perf1 ->  |etr_buf0 |
        +---------+                                         |         |
  CPU2: | AUX RB2 | (perf_output_handle_2) -> etr_perf2 ->  |         |
        +---------+                                         |         |
  CPU3: | AUX RB3 | (perf_output_handle_3) -> etr_perf3 ->  +---------+
        +---------+                                              |
             ^---------------------------------------------------/
                           tmc_etr_sync_perf_buffer()

> Now coming back to this patch, I understand that the sync_perf could be
> called with the polling patches multiple times. But don't we do a
> perf_output_handle_end() each of the time we wake up ? (I haven't looked
> at the later patches yet).

Here "we" means the polling's work (or we can say the polling thread).
From my understanding for the patch of implementation the polling, it
adds the delayed work on work queue periodically, every time the work
resets the ETR and sync the trace data from etr_buf0 to AUX ring
buffer.  The target AUX ring buffer is fixed, it should be the first
registered AUX ring buffer when launch the session.

So the flow is:

  etr_perf_polling_worker()
    sink_ops(sink)->update_buffer()
      tmc_etr_sync_perf_buffer() -> Update 'etr_perf->head'
    tmc_etr_reset
    perf_aux_output_end(handle, size)  -> Update AUX ring buffer head
                                          and notify the perf tool
    perf_aux_output_begin()            -> Prepare for next recording

> I would expect:
> 
>   perf_aux_output_begin() -> update the etr_perf-> head
> 
>   when we sync the buffer, we do :
> 
>  Poll-> sync_buffer-> perf_aux_output_end() and perf_aux_output_begin() ->
> update etr_perf->head.

I understand your meaning, essentially "etr_perf->head" should keep
up the latest value of the AUX ring buffer's head.  So I think below
change is what you are proposing:

diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index 8e8596dc75fa..daeeda00236e 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -1567,6 +1567,7 @@ tmc_update_etr_buffer(struct coresight_device *csdev,
 	/* Insert barrier packets at the beginning, if there was an overflow */
 	if (lost)
 		tmc_etr_buf_insert_barrier_packet(etr_buf, offset);
+	etr_perf->head = handle->head;
 	tmc_etr_sync_perf_buffer(etr_perf, offset, size);
 
 	/*

Thanks,
Leo
diff mbox series

Patch

diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index ea5027e397d02..dd19d1d1c3b38 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -1442,7 +1442,7 @@  static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf,
 {
 	long bytes;
 	long pg_idx, pg_offset;
-	unsigned long head = etr_perf->head;
+	unsigned long head;
 	char **dst_pages, *src_buf;
 	struct etr_buf *etr_buf = etr_perf->etr_buf;
 
@@ -1465,7 +1465,7 @@  static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf,
 		bytes = tmc_etr_buf_get_data(etr_buf, src_offset, to_copy,
 					     &src_buf);
 		if (WARN_ON_ONCE(bytes <= 0))
-			break;
+			return;
 		bytes = min(bytes, (long)(PAGE_SIZE - pg_offset));
 
 		memcpy(dst_pages[pg_idx] + pg_offset, src_buf, bytes);
@@ -1483,6 +1483,7 @@  static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf,
 		/* Move source pointers */
 		src_offset += bytes;
 	}
+	etr_perf->head = (pg_idx << PAGE_SHIFT) + pg_offset;
 }
 
 /*